A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
placement/function/functions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_PLACEMENT_FUNCTION_FUNCTIONS_H_
2 #define GENESIS_PLACEMENT_FUNCTION_FUNCTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <string>
37 #include <unordered_map>
38 #include <unordered_set>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 namespace placement {
44 
45 // =================================================================================================
46 // Pquery Names
47 // =================================================================================================
48 
52 bool has_name( Pquery const& pquery, std::string const& name );
53 
58 bool has_name( Sample const& smp, std::string const& name );
59 
63 Pquery const* find_pquery( Sample const& smp, std::string const& name );
64 
68 Pquery* find_pquery( Sample& smp, std::string const& name );
69 
76 std::unordered_set<std::string> all_pquery_names( Sample const& sample );
77 
78 // =================================================================================================
79 // Normalization and Sorting
80 // =================================================================================================
81 
86 void normalize_weight_ratios( Pquery& pquery );
87 
95 void normalize_weight_ratios( Sample& smp );
96 
97 // void sort_placements_by_proximal_length( PlacementTreeEdge& edge );
98 // void sort_placements_by_proximal_length( Sample& smp );
99 
104 void sort_placements_by_weight( Pquery& pquery );
105 
110 void sort_placements_by_weight( Sample& smp );
111 
119 void scale_all_branch_lengths( Sample& smp, double factor = 1.0 );
120 
133 void adjust_branch_lengths( Sample& sample, tree::Tree const& source );
134 
135 // =================================================================================================
136 // Filtering
137 // =================================================================================================
138 
147 void filter_min_accumulated_weight( Pquery& pquery, double threshold = 0.99 );
148 
156 void filter_min_accumulated_weight( Sample& smp, double threshold = 0.99 );
157 
170 void filter_n_max_weight_placements( Pquery& pquery, size_t n = 1 );
171 
179 void filter_n_max_weight_placements( Sample& smp, size_t n = 1 );
180 
184 void filter_min_weight_threshold( Pquery& pquery, double threshold = 0.01 );
185 
190 void filter_min_weight_threshold( Sample& smp, double threshold = 0.01 );
191 
199 void filter_pqueries_keeping_names( Sample& smp, std::string const& regex );
200 
211 void filter_pqueries_keeping_names( Sample& smp, std::unordered_set<std::string> keep_list );
212 
220 void filter_pqueries_removing_names( Sample& smp, std::string const& regex );
221 
232 void filter_pqueries_removing_names( Sample& smp, std::unordered_set<std::string> remove_list );
233 
241 void filter_pqueries_intersecting_names( Sample& sample_1, Sample& sample_2 );
242 
253 void filter_pqueries_differing_names( Sample& sample_1, Sample& sample_2 );
254 
263 size_t remove_empty_pqueries( Sample& sample );
264 
265 // =================================================================================================
266 // Joining and Merging
267 // =================================================================================================
268 
280 void copy_pqueries( Sample const& source, Sample& target );
281 
293 void merge_duplicates( Sample& smp );
294 
309 void collect_duplicate_pqueries( Sample& smp );
310 
318 void merge_duplicate_placements( Pquery& pquery );
319 
323 void merge_duplicate_placements( Sample& smp );
324 
329 void merge_duplicate_names( Pquery& pquery );
330 
334 void merge_duplicate_names( Sample& smp );
335 
336 // =================================================================================================
337 // Placement Mass
338 // =================================================================================================
339 
344 double total_multiplicity( Pquery const& pqry );
345 
350 double total_multiplicity( Sample const& sample );
351 
356 size_t total_name_count( Sample const& smp );
357 
362 size_t total_placement_count( Sample const& smp );
363 
373 double total_placement_mass( Sample const& smp );
374 
388 double total_placement_mass_with_multiplicities( Sample const& smp );
389 
394 std::pair<PlacementTreeEdge const*, size_t> placement_count_max_edge( Sample const& smp );
395 
400 std::pair<PlacementTreeEdge const*, double> placement_mass_max_edge( Sample const& smp );
401 
402 // =================================================================================================
403 // Histograms
404 // =================================================================================================
405 
406 std::vector<double> closest_leaf_weight_distribution( Sample const& sample );
407 
433 std::vector<int> closest_leaf_depth_histogram( Sample const& smp );
434 
460 std::vector<int> closest_leaf_distance_histogram (
461  Sample const& smp, const double min, const double max, const int bins = 10
462 );
463 
494 std::vector<int> closest_leaf_distance_histogram_auto (
495  Sample const& smp, double& min, double& max, const int bins = 10
496 );
497 
498 } // namespace placement
499 } // namespace genesis
500 
501 #endif // include guard
std::vector< double > closest_leaf_weight_distribution(Sample const &sample)
size_t total_name_count(Sample const &smp)
Get the total number of PqueryNames in all Pqueries of the given Sample.
void sort_placements_by_weight(Pquery &pquery)
Sort the PqueryPlacements of a Pquery by their like_weight_ratio, in descending order (most likely fi...
std::unordered_set< std::string > all_pquery_names(Sample const &sample)
Return a set of all unique PqueryNames of the Pqueries of the given sample.
void filter_n_max_weight_placements(Pquery &pquery, size_t n)
Remove all PqueryPlacements but the n most likely ones from the Pquery.
bool has_name(Pquery const &pquery, std::string const &name)
Return true iff the given Pquery contains a particular name.
void scale_all_branch_lengths(Sample &smp, double factor)
Scale all branch lengths of the Tree and the position of the PqueryPlacements by a given factor...
void filter_pqueries_differing_names(Sample &sample_1, Sample &sample_2)
Remove all Pqueries from the two Samples that have a name in common.
void normalize_weight_ratios(Pquery &pquery)
Recalculate the like_weight_ratio of the PqueryPlacement&s of a Pquery, so that their sum is 1...
std::pair< PlacementTreeEdge const *, size_t > placement_count_max_edge(Sample const &smp)
Get the number of placements on the edge with the most placements, and a pointer to this edge...
void filter_pqueries_keeping_names(Sample &smp, std::string const &regex)
Remove all Pqueries which do not have at least one name that matches the given regex.
void collect_duplicate_pqueries(Sample &smp)
Find all Pqueries that share a common name and combine them into a single Pquery containing all thei...
std::vector< int > closest_leaf_distance_histogram(Sample const &smp, const double min, const double max, const int bins)
Returns a histogram counting the number of placements that have a certain distance to their closest l...
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
std::vector< int > closest_leaf_distance_histogram_auto(Sample const &smp, double &min, double &max, const int bins)
Returns the same type of histogram as closest_leaf_distance_histogram(), but automatically determines...
std::pair< PlacementTreeEdge const *, double > placement_mass_max_edge(Sample const &smp)
Get the summed mass of the placements on the heaviest edge, measured by their like_weight_ratio, and a pointer to this edge.
void copy_pqueries(Sample const &source, Sample &target)
Copy all Pqueries from the source Sample (left parameter) to the target Sample (right parameter)...
void merge_duplicate_names(Pquery &pquery)
Merge all PqueryNames that have the same name property into one, while adding up their multiplicity...
void filter_min_accumulated_weight(Pquery &pquery, double threshold)
Remove the PqueryPlacements with the lowest like_weight_ratio, while keeping the accumulated weight (...
void filter_min_weight_threshold(Pquery &pquery, double threshold)
Remove all PqueryPlacements that have a like_weight_ratio below the given threshold.
std::vector< int > closest_leaf_depth_histogram(Sample const &smp)
Return a histogram representing how many placements have which depth with respect to their closest le...
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
void adjust_branch_lengths(Sample &sample, tree::Tree const &source)
Take the branch lengths of the source Tree and use them as the new branch lengths of the sample...
double total_placement_mass(Sample const &smp)
Get the summed mass of all PqueryPlacements in all Pqueries of the given Sample, where mass is measu...
void merge_duplicate_placements(Pquery &pquery)
Merge all PqueryPlacements of a Pquery that are on the same TreeEdge into one averaged PqueryPlacemen...
void filter_pqueries_intersecting_names(Sample &sample_1, Sample &sample_2)
Remove all Pqueries from the two Samples except the ones that have names in common.
size_t remove_empty_pqueries(Sample &sample)
Remove all Pqueries from the Sample that have no PqueryPlacements.
Pquery const * find_pquery(Sample const &smp, std::string const &name)
Return the first Pquery that has a particular name, or nullptr of none has.
size_t total_placement_count(Sample const &smp)
Get the total number of PqueryPlacements in all Pqueries of the given Sample.
void merge_duplicates(Sample &smp)
Look for Pqueries with the same name and merge them.
void filter_pqueries_removing_names(Sample &smp, std::string const &regex)
Remove all Pqueries which have at least one name that matches the given regex.