A library for working with phylogenetic and population genetic data.
v0.32.0
masses.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
42 
43 #include <algorithm>
44 #include <cassert>
45 #include <cmath>
46 #include <exception>
47 #include <regex>
48 #include <string>
49 #include <unordered_map>
50 #include <unordered_set>
51 #include <vector>
52 
53 
54 #ifdef GENESIS_OPENMP
55 # include <omp.h>
56 #endif
57 
58 namespace genesis {
59 namespace placement {
60 
61 // =================================================================================================
62 // Mulitplicities
63 // =================================================================================================
64 
65 double total_multiplicity( Pquery const& pqry )
66 {
67  double mult = 0.0;
68  for( auto const& name : pqry.names() ) {
69  mult += name.multiplicity;
70  }
71  return mult;
72 }
73 
74 double total_multiplicity( Sample const& sample )
75 {
76  double mult = 0.0;
77  for( auto const& pqry : sample.pqueries() ) {
78  mult += total_multiplicity( pqry );
79  }
80  return mult;
81 }
82 
83 // =================================================================================================
84 // Masses
85 // =================================================================================================
86 
87 std::vector<double> placement_mass_per_edges_with_multiplicities( Sample const& sample )
88 {
89  auto result = std::vector<double>( sample.tree().edge_count(), 0.0 );
90 
91  for( auto const& pqry : sample.pqueries() ) {
92  auto const mult = total_multiplicity( pqry );
93  for( auto const& place : pqry.placements() ) {
94  result[ place.edge().index() ] += place.like_weight_ratio * mult;
95  }
96  }
97 
98  return result;
99 }
100 
102 {
103  auto result = utils::Matrix<double>();
104 
105  // Edge case.
106  if( sample_set.size() == 0 ) {
107  return result;
108  }
109 
110  // Init matrix.
111  auto const set_size = sample_set.size();
112  result = utils::Matrix<double>( set_size, sample_set[ 0 ].tree().edge_count(), 0.0 );
113 
114  // Return completely empty matrix in edge cases.
115  if( result.rows() == 0 || result.cols() == 0 ) {
116  return result;
117  }
118 
119  // Fill matrix.
120  #pragma omp parallel for schedule(dynamic)
121  for( size_t i = 0; i < set_size; ++i ) {
122  auto const& smp = sample_set[ i ];
123 
124  if( smp.tree().edge_count() != result.cols() ) {
125  throw std::runtime_error(
126  "Cannot calculate placement weights per edge matrix "
127  "for Samples with Trees of different size."
128  );
129  }
130 
131  for( auto const& pqry : smp.pqueries() ) {
132  auto const mult = total_multiplicity( pqry );
133  for( auto const& place : pqry.placements() ) {
134  result( i, place.edge().index() ) += place.like_weight_ratio * mult;
135  }
136  }
137  }
138 
139  return result;
140 }
141 
143 {
144  double sum = 0.0;
145  for( const auto& pqry : smp.pqueries() ) {
146  double mult = 0.0;
147  for( auto const& name : pqry.names() ) {
148  mult += name.multiplicity;
149  }
150 
151  double lwr_sum = 0.0;
152  for( auto const& place : pqry.placements() ) {
153  lwr_sum += place.like_weight_ratio;
154  }
155  sum += lwr_sum * mult;
156  }
157  return sum;
158 }
159 
160 // =================================================================================================
161 // Masses without Multiplicities
162 // =================================================================================================
163 
164 std::vector<double> placement_mass_per_edge_without_multiplicities( Sample const& sample )
165 {
166  auto result = std::vector<double>( sample.tree().edge_count(), 0.0 );
167 
168  for( auto const& pqry : sample.pqueries() ) {
169  for( auto const& place : pqry.placements() ) {
170  result[ place.edge().index() ] += place.like_weight_ratio;
171  }
172  }
173 
174  return result;
175 }
176 
178 {
179  auto result = utils::Matrix<double>();
180 
181  // Edge case.
182  if( sample_set.size() == 0 ) {
183  return result;
184  }
185 
186  // Init matrix.
187  auto const set_size = sample_set.size();
188  result = utils::Matrix<double>( set_size, sample_set[ 0 ].tree().edge_count(), 0.0 );
189 
190  // Return completely empty matrix in edge cases.
191  if( result.rows() == 0 || result.cols() == 0 ) {
192  return result;
193  }
194 
195  // Fill matrix.
196  #pragma omp parallel for schedule(dynamic)
197  for( size_t i = 0; i < set_size; ++i ) {
198  auto const& smp = sample_set[ i ];
199 
200  if( smp.tree().edge_count() != result.cols() ) {
201  throw std::runtime_error(
202  "Cannot calculate placement weights per edge matrix "
203  "for Samples with Trees of different size."
204  );
205  }
206 
207  for( auto const& pqry : smp.pqueries() ) {
208  for( auto const& place : pqry.placements() ) {
209  result( i, place.edge().index() ) += place.like_weight_ratio;
210  }
211  }
212  }
213 
214  return result;
215 }
216 
218 {
219  double sum = 0.0;
220  for( const auto& pqry : smp.pqueries() ) {
221  for( auto& place : pqry.placements() ) {
222  sum += place.like_weight_ratio;
223  }
224  }
225  return sum;
226 }
227 
228 } // namespace placement
229 } // namespace genesis
genesis::placement::total_placement_mass_with_multiplicities
double total_placement_mass_with_multiplicities(Sample const &smp)
Get the mass of all PqueryPlacements of the Sample, using the multiplicities as factors.
Definition: masses.cpp:142
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::placement::Sample::tree
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:124
functions.hpp
CommonTree functions.
genesis::utils::sum
double sum(const Histogram &h)
Definition: utils/math/histogram/stats.cpp:140
genesis::placement::SampleSet
Store a set of Samples with associated names.
Definition: sample_set.hpp:54
genesis::placement::Sample
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on.
Definition: sample.hpp:68
functions.hpp
std.hpp
Provides some valuable additions to STD.
genesis::utils::Matrix< double >
genesis::placement::placement_mass_per_edge_without_multiplicities
std::vector< double > placement_mass_per_edge_without_multiplicities(Sample const &sample)
Return a vector that contains the sum of the masses of the PqueryPlacements per edge of the tree of t...
Definition: masses.cpp:164
distances.hpp
Header of CommonTree distance methods.
genesis::placement::Pquery
A pquery holds a set of PqueryPlacements and a set of PqueryNames.
Definition: pquery.hpp:82
masses.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::placement::total_placement_mass_without_multiplicities
double total_placement_mass_without_multiplicities(Sample const &smp)
Get the summed mass of all PqueryPlacements in all Pqueries of the given Sample, where mass is measu...
Definition: masses.cpp:217
genesis::placement::Pquery::names
utils::Range< iterator_names > names()
Return a Range iterator to the PqueryNames.
Definition: pquery.cpp:162
operators.hpp
Tree operator functions.
operators.hpp
genesis::placement::SampleSet::size
size_t size() const
Return the size of the SampleSet, i.e., the number of Samples.
Definition: sample_set.hpp:234
genesis::placement::placement_mass_per_edges_with_multiplicities
std::vector< double > placement_mass_per_edges_with_multiplicities(Sample const &sample)
Return a vector that contains the sum of the masses of the PqueryPlacements per edge of the tree of t...
Definition: masses.cpp:87
distances.hpp
Header of Tree distance methods.
genesis::placement::Sample::pqueries
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:264
genesis::placement::total_multiplicity
double total_multiplicity(Pquery const &pqry)
Return the sum of all multiplicities of the Pquery.
Definition: masses.cpp:65
genesis::tree::edge_count
size_t edge_count(Tree const &tree)
Return the number of Edges of a Tree. Same as Tree::edge_count().
Definition: tree/function/functions.cpp:212
helper.hpp
genesis::tree::Tree::edge_count
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.hpp:272