49 #include <unordered_map>
50 #include <unordered_set>
66 for(
size_t i = 0; i < pquery.
name_size(); ++i ) {
76 for(
auto const& pqry : smp.
pqueries() ) {
88 for(
auto const& pqry : smp.
pqueries() ) {
100 for(
auto& pqry : smp.
pqueries() ) {
110 std::unordered_set<std::string> result;
111 for(
auto const& pquery : sample ) {
112 for(
auto const& pname : pquery.names() ) {
113 result.insert( pname.name );
126 for(
auto const& place : pquery.
placements() ) {
127 sum += place.like_weight_ratio;
130 throw std::overflow_error(
"Cannot normalize weight ratios if all of them are zero." );
133 place.like_weight_ratio /=
sum;
139 for(
auto pqry_it = smp.
begin(); pqry_it != smp.
end(); ++pqry_it ) {
156 return lhs.like_weight_ratio > rhs.like_weight_ratio;
163 for(
auto pqry_it = smp.
begin(); pqry_it != smp.
end(); ++pqry_it ) {
171 for(
auto& pqry : smp.
pqueries() ) {
172 for(
auto& place : pqry.placements() ) {
173 place.proximal_length *= factor;
182 throw std::runtime_error(
"Sizes of the Sample::tree() and source tree differ." );
187 #pragma omp parallel for
188 for(
size_t i = 0; i < sample.
size(); ++i ) {
189 auto& pquery = sample.
at(i);
191 for(
auto& placement : pquery.placements() ) {
192 auto const edge_idx = placement.edge().index();
196 placement.proximal_length *= new_bl / old_bl;
201 #pragma omp parallel for
202 for(
size_t edge_idx = 0; edge_idx < source.
edge_count(); ++edge_idx ) {
219 double weight_sum = 0.0;
229 placements.erase( placements.begin() + i, placements.end() );
241 for(
auto& pquery : smp ) {
259 placements.erase( placements.begin() + n, placements.end() );
270 for(
auto& pquery : smp ) {
298 for(
auto& pquery : smp ) {
313 for(
auto& pquery : sample ) {
328 for(
auto& pquery : sample ) {
336 auto new_end_pqueries = std::remove_if(
340 cnt += static_cast<int>( pqry.placement_size() == 0 );
341 return pqry.placement_size() == 0;
344 assert( cnt ==
static_cast<size_t>( sample.
end() - new_end_pqueries ));
345 sample.
remove( new_end_pqueries, sample.
end() );
399 bool remove_empty_pqueries
401 for(
auto& pquery : smp ) {
403 auto& name_container = pquery.expose_names();
404 auto new_end_names = std::remove_if(
405 name_container.begin(),
406 name_container.end(),
408 return ! predicate( nm.name );
411 name_container.erase( new_end_names, name_container.end() );
413 if( remove_empty_pqueries ) {
420 std::string
const& regex,
421 bool remove_empty_pqueries
423 std::regex pattern( regex );
425 return regex_match( name, pattern );
426 }, remove_empty_pqueries );
431 std::unordered_set<std::string>
const& keep_list,
432 bool remove_empty_pqueries
435 return keep_list.count( name ) > 0;
436 }, remove_empty_pqueries );
441 std::string
const& regex,
442 bool remove_empty_pqueries
444 std::regex pattern( regex );
446 return ! regex_match( name, pattern );
447 }, remove_empty_pqueries );
452 std::unordered_set<std::string>
const& remove_list,
453 bool remove_empty_pqueries
456 return remove_list.count( name ) == 0;
457 }, remove_empty_pqueries );
463 bool remove_empty_pqueries
477 bool remove_empty_pqueries
483 auto names_1 = std::vector< std::string >( names_1_u.begin(), names_1_u.end() );
484 std::sort( names_1.begin(), names_1.end() );
489 auto names_2 = std::vector< std::string >( names_2_u.begin(), names_2_u.end() );
490 std::sort( names_2.begin(), names_2.end() );
494 std::unordered_set< std::string > symdiff;
495 std::set_intersection(
496 names_1.begin(), names_1.end(),
497 names_2.begin(), names_2.end(),
498 std::inserter( symdiff, symdiff.end() )
509 auto new_end_pqueries = std::remove_if(
513 cnt += static_cast<int>( pqry.name_size() == 0 );
514 return pqry.name_size() == 0;
517 assert( cnt ==
static_cast<size_t>( sample.
end() - new_end_pqueries ));
518 sample.
remove( new_end_pqueries, sample.
end() );
544 throw std::runtime_error(
"Cannot join Samples, because their PlacementTrees differ.");
551 for(
const auto& opqry : source.
pqueries() ) {
553 for(
auto opit = opqry.begin_placements(); opit != opqry.end_placements(); ++opit ) {
558 assert( en_map.count( opit->edge_num() ) > 0 );
560 npqry.add_placement( *en_map[ opit->edge_num() ], *opit );
562 for(
auto name_it = opqry.begin_names(); name_it != opqry.end_names(); ++name_it ) {
563 npqry.add_name( *name_it );
591 bool need_iteration =
true;
592 while (need_iteration) {
593 need_iteration =
false;
598 std::unordered_map<std::string, Pquery*> hash;
603 std::vector<size_t> del;
605 for(
size_t i = 0; i < smp.
size(); ++i ) {
606 auto& pqry = smp.
at(i);
610 std::unordered_set<Pquery*> merges;
611 for(
auto name_it = pqry.begin_names(); name_it != pqry.end_names(); ++name_it ) {
612 auto& name = *name_it;
614 if (hash.count(name.name) > 0) {
615 merges.insert(hash[name.name]);
619 if (merges.size() == 0) {
622 for(
auto name_it = pqry.begin_names(); name_it != pqry.end_names(); ++name_it ) {
623 auto& name = *name_it;
627 assert( hash.count(name.name) == 0 );
630 hash[name.name] = &pqry;
638 Pquery* merge_into = *merges.begin();
643 for(
auto const& place : pqry.placements() ) {
651 for(
auto const& name : pqry.names() ) {
659 hash[name.name] = merge_into;
669 if (merges.size() > 1) {
670 need_iteration =
true;
677 for(
auto it = del.rbegin(); it != del.rend(); ++it ) {
692 std::unordered_map< size_t, std::pair< PqueryPlacement, size_t >> merge_units;
695 auto const& place = *pit;
696 auto edge_idx = place.edge().index();
699 if( merge_units.count( edge_idx ) == 0 ) {
700 merge_units[ edge_idx ] = { place, 0 };
704 auto& merge_into = merge_units[ edge_idx ].first;
705 ++merge_units[ edge_idx ].second;
707 merge_into.likelihood += place.likelihood;
708 merge_into.like_weight_ratio += place.like_weight_ratio;
709 merge_into.proximal_length += place.proximal_length;
710 merge_into.pendant_length += place.pendant_length;
716 for(
auto& merge_unit : merge_units ) {
717 auto& place = merge_unit.second.first;
719 if( merge_unit.second.second > 1 ) {
720 double denom =
static_cast<double>( merge_unit.second.second );
722 place.likelihood /= denom;
723 place.like_weight_ratio /= denom;
724 place.proximal_length /= denom;
725 place.pendant_length /= denom;
734 for(
auto pquery_it = smp.
begin(); pquery_it != smp.
end(); ++pquery_it ) {
742 std::unordered_map<std::string, PqueryName> result;
744 auto const& name = *name_it;
746 if( result.count( name.name ) == 0 ) {
747 result[ name.name ] = name;
749 result[ name.name ].multiplicity += name.multiplicity;
755 for(
auto const& n : result ) {
762 for(
auto pquery_it = smp.
begin(); pquery_it != smp.
end(); ++pquery_it ) {
774 for(
auto const& pqry : smp.
pqueries() ) {
775 count += pqry.name_size();
783 for(
auto const& pqry : smp.
pqueries() ) {
784 count += pqry.placement_size();
796 for(
size_t edge_i = 0; edge_i < place_map.size(); ++edge_i ) {
797 if( place_map[ edge_i ].size() > max ) {
799 max = place_map[ edge_i ].size();
803 return std::make_pair(edge, max);
813 for(
size_t edge_i = 0; edge_i < place_map.size(); ++edge_i ) {
815 for(
auto const& place : place_map[ edge_i ]) {
816 sum += place->like_weight_ratio;
823 return std::make_pair(edge, max);
832 std::vector<double> distrib;
837 for(
auto const& pquery : sample.
pqueries() ) {
838 for(
auto& place : pquery.placements() ) {
841 auto dp = depths[ place.edge().primary_node().index() ].second;
842 auto ds = depths[ place.edge().secondary_node().index() ].second;
843 auto ld = std::min(dp, ds);
846 if( distrib.size() < ld + 1 ) {
847 distrib.resize( ld + 1, 0.0 );
849 distrib[ld] += place.like_weight_ratio;
860 std::vector<int> hist;
865 for(
auto const& pquery : smp.
pqueries() ) {
866 for(
auto& place : pquery.placements() ) {
869 auto dp = depths[ place.edge().primary_node().index() ].second;
870 auto ds = depths[ place.edge().secondary_node().index() ].second;
871 auto ld = std::min(dp, ds);
874 if (ld + 1 > hist.size()) {
875 hist.resize(ld + 1, 0);
885 Sample const& smp,
const double min,
const double max,
const int bins
887 std::vector<int> hist(bins, 0);
888 double bin_size = (max - min) / bins;
893 for (
const auto& pquery : smp.
pqueries()) {
894 for(
auto& place : pquery.placements() ) {
897 double dp = place.pendant_length
898 + place.proximal_length
899 + dists[ place.edge().primary_node().index() ].second;
900 double ds = place.pendant_length
902 - place.proximal_length
903 + dists[ place.edge().secondary_node().index() ].second;
904 double ld = std::min(dp, ds);
908 int bin =
static_cast <int> (std::floor( (ld - min) / bin_size ));
923 Sample const& smp,
double& min,
double& max,
const int bins
925 std::vector<int> hist(bins, 0);
929 std::vector<double> distrib;
937 for (
const auto& pquery : smp.
pqueries()) {
938 for(
auto& place : pquery.placements() ) {
941 double dp = place.pendant_length
942 + place.proximal_length
943 + dists[ place.edge().primary_node().index() ].second;
944 double ds = place.pendant_length
946 - place.proximal_length
947 + dists[ place.edge().secondary_node().index() ].second;
948 double ld = std::min(dp, ds);
949 distrib.push_back(ld);
953 if (distrib.size() == 1 || ld < min_d) {
956 if (distrib.size() == 1 || ld > max_d) {
957 max_d = std::nextafter(ld, ld + 1);
963 double bin_size = (max_d - min_d) / bins;
964 for (
double ld : distrib) {
965 int bin =
static_cast <int> (std::floor( (ld - min_d) / bin_size ));
966 assert(bin >= 0 && bin < bins);