|
A library for working with phylogenetic and population genetic data.
v0.27.0
|
|
Go to the documentation of this file.
61 size_t target_taxonomy_size,
72 <<
" leaf Taxa. Pruning with " +
std::to_string( target_taxonomy_size )
73 <<
" leaves thus includes the whole taxonomy.";
76 for(
auto it :
preorder( taxonomy ) ) {
78 if( it.taxon().size() == 0 ) {
103 std::multimap< double, Taxon* > border_candidates;
106 size_t border_taxa_count = 0;
115 std::function< void (
Taxon& ) > add_taxon_to_border = [&] (
Taxon& taxon ) {
118 LOG_WARN <<
"Taxon added to border not outside ("
136 for(
auto it :
preorder( taxon ) ) {
138 if( it.taxon().size() == 0 ) {
146 }
else if( taxon.size() == 1 ) {
156 add_taxon_to_border( taxon.at(0) );
166 if( taxon.size() > 0 ) {
173 std::function< void (
Taxon& ) > expand_taxon = [&] (
179 LOG_WARN <<
"Expanding Taxon with status "
189 for(
auto& child : taxon ) {
190 add_taxon_to_border( child );
204 auto include_min_level_taxa = [&] (
Taxon& taxon ) {
210 if( taxon.size() > 0 ) {
220 add_taxon_to_border( taxon );
244 std::function< void (
Taxon& ) > resolve_big_subtaxa = [&] (
266 auto cand = std::find_if(
267 border_candidates.begin(),
268 border_candidates.end(),
269 [ &taxon ] ( std::pair< const double, Taxon* >& entry ) {
270 return &taxon == entry.second;
274 if( cand != border_candidates.end() ) {
277 assert( taxon.size() > 0 );
280 border_candidates.erase( cand );
286 for(
auto& child : taxon ) {
287 resolve_big_subtaxa( child );
298 add_taxon_to_border( taxon );
310 for(
auto& taxon : taxonomy ) {
311 resolve_big_subtaxa( taxon );
331 for(
auto& child : taxonomy ) {
332 add_taxon_to_border( child );
341 while( border_taxa_count < target_taxonomy_size ) {
345 assert( border_candidates.size() > 0 );
351 auto cur_front = *border_candidates.rbegin();
352 assert( *std::prev(border_candidates.end()) == *border_candidates.rbegin() );
357 border_candidates.erase( --border_candidates.rbegin().base() );
358 assert( cur_front != *border_candidates.rbegin() );
368 assert( cur_front.second->size() > 0 );
373 utils::abs_diff( border_taxa_count + cur_front.second->size(), target_taxonomy_size )
385 expand_taxon( *cur_front.second );
414 size_t min_subtaxonomy_size
416 for(
auto& taxon : taxonomy ) {
437 for(
auto it :
preorder( taxon ) ) {
439 if( it.taxon().size() == 0 ) {
454 auto do_count = [&] (
Taxon const& taxon ) {
465 auto do_removal = [&] (
Taxon& taxon ) {
467 taxon.clear_children();
475 auto print_taxon = [&] (
Taxon const& taxon ) {
476 result += std::string(
taxon_level(taxon) * 4,
' ' );
480 result += taxon.name();
497 auto check_parents = [&] (
Taxon const& taxon ) {
504 LOG_INFO <<
"Taxon with incorrect data type (not EntropyTaxonData): " << name;
517 LOG_INFO <<
"Taxon is a leaf but has status 'kInside': " << name;
521 auto cur_ptr = taxon.parent();
522 while( cur_ptr !=
nullptr ) {
539 child_status = cur_status;
544 child_status = cur_status;
547 LOG_INFO <<
"Taxon and child with wrong pruning status ("
556 cur_ptr = cur_ptr->parent();
#define LOG_INFO
Log an info message. See genesis::utils::LoggingLevel.
constexpr T abs_diff(T const &lhs, T const &rhs)
Calculate the absolute differenence between two values.
size_t taxon_level(Taxon const &taxon)
Return the level of depth of a given Taxon.
static std::string status_abbreviation(PruneStatus stat)
size_t max_subtaxonomy_size
Maximal size of a sub-taxonomy of the pruned Taxonomy. Default is 0.
Store a Taxon, i.e., an element in a Taxonomy, with its name, rank, ID and sub-taxa.
size_t min_subtaxonomy_size
Minimal size of a sub-taxonomy of the pruned Taxonomy. Default is 0.
void levelorder_for_each(Taxonomy &tax, std::function< void(Taxon &)> fn, bool include_inner_taxa=true)
Apply a function to all taxa of the Taxonomy, traversing it in levelorder.
Helper class to generate a taxonomic path string from a Taxopath object or a Taxon.
utils::Range< IteratorPreorder< Taxonomy const, Taxon const > > preorder(TaxonomyType const &taxonomy)
std::string to_string(GenomeLocus const &locus)
size_t taxa_count_lowest_levels(Taxonomy const &tax)
Return the number of lowest level Taxa (i.e., taxa without sub-taxa) in the Taxonomy.
#define LOG_WARN
Log a warning. See genesis::utils::LoggingLevel.
Provides easy and fast logging functionality.
size_t count_taxa_with_prune_status(Taxonomy const &taxonomy, EntropyTaxonData::PruneStatus status)
Return the number of Taxa that have a certain prune status.
void expand_small_subtaxonomies(Taxonomy &taxonomy, size_t min_subtaxonomy_size)
Expand the leaves of a pruned Taxonomy if their sub-taxonomies are smaller than the given threshold.
Store settings for the Taxonomy pruning algorithm prune_by_entropy().
void prune_by_entropy(Taxonomy &taxonomy, size_t target_taxonomy_size, PruneByEntropySettings settings)
Prune a Taxonomy so that the result (approximately) contains a desired number of "leaf" Taxa,...
void preorder_for_each(Taxonomy &tax, std::function< void(Taxon &)> fn, bool include_inner_taxa=true)
Apply a function to all taxa of the Taxonomy, traversing it in preorder.
Simple text style class for colorized and bold output to a terminal.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
void remove_pruned_taxonomy_children(Taxonomy &taxonomy)
Remove the children of all Taxa that are pruned, i.e, that have prune status == kOutside.
Store a Taxonomy, i.e., a nested hierarchy of Taxa.
bool validate_pruned_taxonomy(Taxonomy const &taxonomy)
Validate that the pruning status of a Taxonomy is valid.
BaseCountsStatus status(BaseCounts const &sample, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Compute a simple status with useful properties from the counts of a BaseCounts.
std::string print_pruned_taxonomy(Taxonomy const &taxonomy)
Print a Taxonomy, highlighting those Taxa that are the pruning border, i.e., where we cut off the sub...
size_t min_border_level
Minimum level of the Taxa that are considered inside for pruning. Default is 0.
bool allow_approximation
Allow some approximation in order to get closer to the target pruning size.