61 size_t target_taxonomy_size,
72 <<
" leaf Taxa. Pruning with " +
std::to_string( target_taxonomy_size )
73 <<
" leaves thus includes the whole taxonomy.";
76 for(
auto it :
preorder( taxonomy ) ) {
78 if( it.taxon().size() == 0 ) {
103 std::multimap< double, Taxon* > border_candidates;
106 size_t border_taxa_count = 0;
115 std::function< void (
Taxon& ) > add_taxon_to_border = [&] (
Taxon& taxon ) {
118 LOG_WARN <<
"Taxon added to border not outside ("
136 for(
auto it :
preorder( taxon ) ) {
138 if( it.taxon().size() == 0 ) {
146 }
else if( taxon.size() == 1 ) {
156 add_taxon_to_border( taxon.at(0) );
166 if( taxon.size() > 0 ) {
173 std::function< void (
Taxon& ) > expand_taxon = [&] (
179 LOG_WARN <<
"Expanding Taxon with status "
189 for(
auto& child : taxon ) {
190 add_taxon_to_border( child );
204 auto include_min_level_taxa = [&] (
Taxon& taxon ) {
210 if( taxon.size() > 0 ) {
220 add_taxon_to_border( taxon );
244 std::function< void (
Taxon& ) > resolve_big_subtaxa = [&] (
266 auto cand = std::find_if(
267 border_candidates.begin(),
268 border_candidates.end(),
269 [ &taxon ] ( std::pair< const double, Taxon* >& entry ) {
270 return &taxon == entry.second;
274 if( cand != border_candidates.end() ) {
277 assert( taxon.size() > 0 );
280 border_candidates.erase( cand );
286 for(
auto& child : taxon ) {
287 resolve_big_subtaxa( child );
298 add_taxon_to_border( taxon );
310 for(
auto& taxon : taxonomy ) {
311 resolve_big_subtaxa( taxon );
331 for(
auto& child : taxonomy ) {
332 add_taxon_to_border( child );
341 while( border_taxa_count < target_taxonomy_size ) {
345 assert( border_candidates.size() > 0 );
351 auto cur_front = *border_candidates.rbegin();
352 assert( *std::prev(border_candidates.end()) == *border_candidates.rbegin() );
357 border_candidates.erase( --border_candidates.rbegin().base() );
358 assert( cur_front != *border_candidates.rbegin() );
368 assert( cur_front.second->size() > 0 );
373 utils::abs_diff( border_taxa_count + cur_front.second->size(), target_taxonomy_size )
385 expand_taxon( *cur_front.second );
414 size_t min_subtaxonomy_size
416 for(
auto& taxon : taxonomy ) {
437 for(
auto it :
preorder( taxon ) ) {
439 if( it.taxon().size() == 0 ) {
454 auto do_count = [&] (
Taxon const& taxon ) {
465 auto do_removal = [&] (
Taxon& taxon ) {
467 taxon.clear_children();
475 auto print_taxon = [&] (
Taxon const& taxon ) {
476 result += std::string(
taxon_level(taxon) * 4,
' ' );
480 result += taxon.name();
497 auto check_parents = [&] (
Taxon const& taxon ) {
504 LOG_INFO <<
"Taxon with incorrect data type (not EntropyTaxonData): " << name;
517 LOG_INFO <<
"Taxon is a leaf but has status 'kInside': " << name;
521 auto cur_ptr = taxon.parent();
522 while( cur_ptr !=
nullptr ) {
539 child_status = cur_status;
544 child_status = cur_status;
547 LOG_INFO <<
"Taxon and child with wrong pruning status ("
556 cur_ptr = cur_ptr->parent();