A library for working with phylogenetic and population genetic data.
v0.32.0
placement/function/functions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_PLACEMENT_FUNCTION_FUNCTIONS_H_
2 #define GENESIS_PLACEMENT_FUNCTION_FUNCTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
37 
38 #include <string>
39 #include <unordered_map>
40 #include <unordered_set>
41 #include <utility>
42 #include <vector>
43 
44 namespace genesis {
45 namespace placement {
46 
47 // =================================================================================================
48 // Pquery Names
49 // =================================================================================================
50 
54 bool has_name( Pquery const& pquery, std::string const& name );
55 
60 bool has_name( Sample const& smp, std::string const& name );
61 
65 Pquery const* find_pquery( Sample const& smp, std::string const& name );
66 
70 Pquery* find_pquery( Sample& smp, std::string const& name );
71 
78 std::unordered_set<std::string> all_pquery_names( Sample const& sample );
79 
80 // =================================================================================================
81 // Normalization and Sorting
82 // =================================================================================================
83 
88 void normalize_weight_ratios( Pquery& pquery );
89 
97 void normalize_weight_ratios( Sample& smp );
98 
99 // void sort_placements_by_proximal_length( PlacementTreeEdge& edge );
100 // void sort_placements_by_proximal_length( Sample& smp );
101 
106 void sort_placements_by_weight( Pquery& pquery );
107 
112 void sort_placements_by_weight( Sample& smp );
113 
121 void scale_all_branch_lengths( Sample& smp, double factor = 1.0 );
122 
135 void adjust_branch_lengths( Sample& sample, tree::Tree const& source );
136 
137 // =================================================================================================
138 // Filtering Placements
139 // =================================================================================================
140 
149 void filter_min_accumulated_weight( Pquery& pquery, double threshold = 0.99 );
150 
158 void filter_min_accumulated_weight( Sample& smp, double threshold = 0.99 );
159 
172 void filter_n_max_weight_placements( Pquery& pquery, size_t n = 1 );
173 
181 void filter_n_max_weight_placements( Sample& smp, size_t n = 1 );
182 
186 void filter_min_weight_threshold( Pquery& pquery, double threshold = 0.01 );
187 
192 void filter_min_weight_threshold( Sample& smp, double threshold = 0.01 );
193 
197 void filter_min_pendant_length( Pquery& pquery, double threshold );
198 
203 void filter_min_pendant_length( Sample& sample, double threshold );
204 
208 void filter_max_pendant_length( Pquery& pquery, double threshold );
209 
214 void filter_max_pendant_length( Sample& sample, double threshold );
215 
224 size_t remove_empty_placement_pqueries( Sample& sample );
225 
226 // =================================================================================================
227 // Filtering Names
228 // =================================================================================================
229 
237  Sample& smp,
238  std::string const& regex,
239  bool remove_empty_name_pqueries = true
240 );
241 
249  Sample& smp,
250  std::unordered_set<std::string> const& keep_list,
251  bool remove_empty_name_pqueries = true
252 );
253 
261  Sample& smp,
262  std::string const& regex,
263  bool remove_empty_name_pqueries = true
264 );
265 
273  Sample& smp,
274  std::unordered_set<std::string> const& remove_list,
275  bool remove_empty_name_pqueries = true
276 );
277 
288  Sample& sample_1,
289  Sample& sample_2,
290  bool remove_empty_name_pqueries = true
291 );
292 
303  Sample& sample_1,
304  Sample& sample_2,
305  bool remove_empty_name_pqueries = true
306 );
307 
316 size_t remove_empty_name_pqueries( Sample& sample );
317 
318 // =================================================================================================
319 // Joining and Merging
320 // =================================================================================================
321 
333 void copy_pqueries( Sample const& source, Sample& target );
334 
346 void merge_duplicates( Sample& smp );
347 
362 void collect_duplicate_pqueries( Sample& smp );
363 
371 void merge_duplicate_placements( Pquery& pquery );
372 
376 void merge_duplicate_placements( Sample& smp );
377 
382 void merge_duplicate_names( Pquery& pquery );
383 
387 void merge_duplicate_names( Sample& smp );
388 
389 // =================================================================================================
390 // Placement Mass
391 // =================================================================================================
392 
397 size_t total_name_count( Sample const& smp );
398 
403 size_t total_placement_count( Sample const& smp );
404 
409 std::pair<PlacementTreeEdge const*, size_t> placement_count_max_edge( Sample const& smp );
410 
415 std::pair<PlacementTreeEdge const*, double> placement_mass_max_edge( Sample const& smp );
416 
417 // =================================================================================================
418 // Histograms
419 // =================================================================================================
420 
421 std::vector<double> closest_leaf_weight_distribution( Sample const& sample );
422 
448 std::vector<int> closest_leaf_depth_histogram( Sample const& smp );
449 
475 std::vector<int> closest_leaf_distance_histogram (
476  Sample const& smp, const double min, const double max, const int bins = 10
477 );
478 
509 std::vector<int> closest_leaf_distance_histogram_auto (
510  Sample const& smp, double& min, double& max, const int bins = 10
511 );
512 
513 } // namespace placement
514 } // namespace genesis
515 
516 #endif // include guard
genesis::placement::filter_pqueries_keeping_names
void filter_pqueries_keeping_names(Sample &smp, std::string const &regex, bool remove_empty_pqueries)
Remove all PqueryNames which do not match the given regex.
Definition: placement/function/functions.cpp:418
genesis::placement::copy_pqueries
void copy_pqueries(Sample const &source, Sample &target)
Copy all Pqueries from the source Sample (left parameter) to the target Sample (right parameter).
Definition: placement/function/functions.cpp:539
genesis::placement::filter_pqueries_removing_names
void filter_pqueries_removing_names(Sample &smp, std::string const &regex, bool remove_empty_pqueries)
Remove all PqueryNames which match the given regex.
Definition: placement/function/functions.cpp:439
genesis::placement::placement_count_max_edge
std::pair< PlacementTreeEdge const *, size_t > placement_count_max_edge(Sample const &smp)
Get the number of placements on the edge with the most placements, and a pointer to this edge.
Definition: placement/function/functions.cpp:789
genesis::placement::total_name_count
size_t total_name_count(Sample const &smp)
Get the total number of PqueryNames in all Pqueries of the given Sample.
Definition: placement/function/functions.cpp:771
genesis::placement::find_pquery
Pquery const * find_pquery(Sample const &smp, std::string const &name)
Return the first Pquery that has a particular name, or nullptr of none has.
Definition: placement/function/functions.cpp:84
sample_set.hpp
genesis::placement::filter_min_weight_threshold
void filter_min_weight_threshold(Pquery &pquery, double threshold)
Remove all PqueryPlacements that have a like_weight_ratio below the given threshold.
Definition: placement/function/functions.cpp:275
genesis::placement::has_name
bool has_name(Pquery const &pquery, std::string const &name)
Return true iff the given Pquery contains a particular name.
Definition: placement/function/functions.cpp:64
genesis::placement::placement_mass_max_edge
std::pair< PlacementTreeEdge const *, double > placement_mass_max_edge(Sample const &smp)
Get the summed mass of the placements on the heaviest edge, measured by their like_weight_ratio,...
Definition: placement/function/functions.cpp:806
genesis::placement::filter_max_pendant_length
void filter_max_pendant_length(Pquery &pquery, double threshold)
Remove all PqueryPlacements that have a pendant_length above the given threshold.
Definition: placement/function/functions.cpp:318
genesis::placement::filter_pqueries_differing_names
void filter_pqueries_differing_names(Sample &sample_1, Sample &sample_2, bool remove_empty_pqueries)
Remove all PqueryNames from the two Samples that occur in both of them.
Definition: placement/function/functions.cpp:474
genesis::placement::closest_leaf_distance_histogram_auto
std::vector< int > closest_leaf_distance_histogram_auto(Sample const &smp, double &min, double &max, const int bins)
Returns the same type of histogram as closest_leaf_distance_histogram(), but automatically determines...
Definition: placement/function/functions.cpp:922
genesis::placement::sort_placements_by_weight
void sort_placements_by_weight(Pquery &pquery)
Sort the PqueryPlacements of a Pquery by their like_weight_ratio, in descending order (most likely fi...
Definition: placement/function/functions.cpp:147
genesis::placement::filter_pqueries_intersecting_names
void filter_pqueries_intersecting_names(Sample &sample_1, Sample &sample_2, bool remove_empty_pqueries)
Remove all PqueryNames from the two Samples that are unique to each of them.
Definition: placement/function/functions.cpp:460
genesis::placement::scale_all_branch_lengths
void scale_all_branch_lengths(Sample &smp, double factor)
Scale all branch lengths of the Tree and the position of the PqueryPlacements by a given factor.
Definition: placement/function/functions.cpp:168
genesis::placement::merge_duplicate_placements
void merge_duplicate_placements(Pquery &pquery)
Merge all PqueryPlacements of a Pquery that are on the same TreeEdge into one averaged PqueryPlacemen...
Definition: placement/function/functions.cpp:688
matrix.hpp
genesis::placement::all_pquery_names
std::unordered_set< std::string > all_pquery_names(Sample const &sample)
Return a set of all unique PqueryNames of the Pqueries of the given sample.
Definition: placement/function/functions.cpp:108
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::placement::merge_duplicates
void merge_duplicates(Sample &smp)
Look for Pqueries with the same name and merge them.
Definition: placement/function/functions.cpp:569
genesis::placement::normalize_weight_ratios
void normalize_weight_ratios(Pquery &pquery)
Recalculate the like_weight_ratio of the PqueryPlacement&s of a Pquery, so that their sum is 1....
Definition: placement/function/functions.cpp:123
genesis::placement::remove_empty_name_pqueries
size_t remove_empty_name_pqueries(Sample &sample)
Remove all Pqueries from the Sample that have no PqueryNames.
Definition: placement/function/functions.cpp:506
genesis::placement::closest_leaf_depth_histogram
std::vector< int > closest_leaf_depth_histogram(Sample const &smp)
Return a histogram representing how many placements have which depth with respect to their closest le...
Definition: placement/function/functions.cpp:858
genesis::placement::closest_leaf_weight_distribution
std::vector< double > closest_leaf_weight_distribution(Sample const &sample)
Definition: placement/function/functions.cpp:830
genesis::placement::collect_duplicate_pqueries
void collect_duplicate_pqueries(Sample &smp)
Find all Pqueries that share a common name and combine them into a single Pquery containing all thei...
Definition: placement/function/functions.cpp:576
genesis::placement::merge_duplicate_names
void merge_duplicate_names(Pquery &pquery)
Merge all PqueryNames that have the same name property into one, while adding up their multiplicity.
Definition: placement/function/functions.cpp:739
genesis::placement::total_placement_count
size_t total_placement_count(Sample const &smp)
Get the total number of PqueryPlacements in all Pqueries of the given Sample.
Definition: placement/function/functions.cpp:780
sample.hpp
genesis::placement::filter_min_accumulated_weight
void filter_min_accumulated_weight(Pquery &pquery, double threshold)
Remove the PqueryPlacements with the lowest like_weight_ratio, while keeping the accumulated weight (...
Definition: placement/function/functions.cpp:212
genesis::placement::adjust_branch_lengths
void adjust_branch_lengths(Sample &sample, tree::Tree const &source)
Take the branch lengths of the source Tree and use them as the new branch lengths of the sample.
Definition: placement/function/functions.cpp:178
genesis::placement::closest_leaf_distance_histogram
std::vector< int > closest_leaf_distance_histogram(Sample const &smp, const double min, const double max, const int bins)
Returns a histogram counting the number of placements that have a certain distance to their closest l...
Definition: placement/function/functions.cpp:884
genesis::placement::filter_n_max_weight_placements
void filter_n_max_weight_placements(Pquery &pquery, size_t n)
Remove all PqueryPlacements but the n most likely ones from the Pquery.
Definition: placement/function/functions.cpp:246
genesis::placement::remove_empty_placement_pqueries
size_t remove_empty_placement_pqueries(Sample &sample)
Remove all Pqueries from the Sample that have no PqueryPlacements.
Definition: placement/function/functions.cpp:333
genesis::placement::filter_min_pendant_length
void filter_min_pendant_length(Pquery &pquery, double threshold)
Remove all PqueryPlacements that have a pendant_length below the given threshold.
Definition: placement/function/functions.cpp:303