A library for working with phylogenetic and population genetic data.
v0.32.0
placement/function/distances.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 
36 #include <algorithm>
37 #include <cassert>
38 #include <cmath>
39 
40 #ifdef GENESIS_OPENMP
41 # include <omp.h>
42 #endif
43 
44 namespace genesis {
45 namespace placement {
46 
47 // =================================================================================================
48 // Pquery to Pquery Distances
49 // =================================================================================================
50 
52  PqueryPlain const& pquery_a,
53  PqueryPlain const& pquery_b,
54  utils::Matrix<double> const& node_distances,
55  bool with_pendant_length
56 ) {
57  double sum = 0.0;
58 
59  #pragma omp parallel for
60  for( size_t pppai = 0; pppai < pquery_a.placements.size(); ++pppai ) {
61  auto const& place_a = pquery_a.placements[ pppai ];
62 
63  #pragma omp parallel for
64  for( size_t pppbi = 0; pppbi < pquery_b.placements.size(); ++pppbi ) {
65  auto const& place_b = pquery_b.placements[ pppbi ];
66  double dist;
67 
68  if (place_a.edge_index == place_b.edge_index) {
69  // same branch case
70  dist = std::abs(place_a.proximal_length - place_b.proximal_length);
71 
72  } else {
73  double pp, pd, dp;
74 
75  // proximal-proximal case
76  pp = place_a.proximal_length
77  + node_distances(place_a.primary_node_index, place_b.primary_node_index)
78  + place_b.proximal_length;
79 
80  // proximal-distal case
81  pd = place_a.proximal_length
82  + node_distances(place_a.primary_node_index, place_b.secondary_node_index)
83  + place_b.branch_length - place_b.proximal_length;
84 
85  // distal-proximal case
86  dp = place_a.branch_length - place_a.proximal_length
87  + node_distances(place_a.secondary_node_index, place_b.primary_node_index)
88  + place_b.proximal_length;
89 
90  // find min of the three cases and
91  dist = std::min({ pp, pd, dp });
92  }
93 
94  // If needed, use pendant length; normalize it to the weight ratios.
95  if (with_pendant_length) {
96  dist += place_a.pendant_length + place_b.pendant_length;
97  }
98  dist *= place_a.like_weight_ratio * place_b.like_weight_ratio;
99 
100  #pragma omp atomic
101  sum += dist;
102  }
103  }
104 
105  return sum;
106 }
107 
111 template<typename DistanceFunction>
113  Pquery const& pquery_a,
114  Pquery const& pquery_b,
115  DistanceFunction distance_function
116 ) {
117  double sum = 0.0;
118 
119  #pragma omp parallel for
120  for( size_t pai = 0; pai < pquery_a.placement_size(); ++pai ) {
121  auto const& place_a = pquery_a.placement_at( pai );
122 
123  #pragma omp parallel for
124  for( size_t pab = 0; pab < pquery_b.placement_size(); ++pab ) {
125  auto const& place_b = pquery_b.placement_at( pab );
126 
127  double dist = distance_function( place_a, place_b );
128  dist *= place_a.like_weight_ratio * place_b.like_weight_ratio;
129 
130  #pragma omp atomic
131  sum += dist;
132  }
133  }
134 
135  return sum;
136 }
137 
139  Pquery const& pquery_a,
140  Pquery const& pquery_b,
141  utils::Matrix<double> const& node_distances,
142  bool with_pendant_length
143 ) {
144  return pquery_distance(
145  pquery_a,
146  pquery_b,
147  [&]( PqueryPlacement const& place_a, PqueryPlacement const& place_b ){
148  double dist = placement_distance( place_a, place_b, node_distances );
149  if( with_pendant_length ) {
150  dist += place_a.pendant_length + place_b.pendant_length;
151  }
152  return dist;
153  }
154  );
155 }
156 
158  PqueryPlacement const& place_a,
159  PqueryPlacement const& place_b,
160  utils::Matrix<double> const& node_distances
161 ) {
162  double dist;
163 
164  if( place_a.edge().index() == place_b.edge().index() ) {
165  // same branch case
166  dist = std::abs( place_a.proximal_length - place_b.proximal_length );
167 
168  } else {
169  // proximal-proximal case
170  double pp = place_a.proximal_length
171  + node_distances(
172  place_a.edge().primary_node().index(),
173  place_b.edge().primary_node().index()
174  )
175  + place_b.proximal_length;
176 
177  // proximal-distal case
178  double pd = place_a.proximal_length
179  + node_distances(
180  place_a.edge().primary_node().index(),
181  place_b.edge().secondary_node().index()
182  )
183  + place_b.edge().data<tree::CommonEdgeData>().branch_length
184  - place_b.proximal_length;
185 
186  // distal-proximal case
187  double dp = place_a.edge().data<tree::CommonEdgeData>().branch_length
188  - place_a.proximal_length
189  + node_distances(
190  place_a.edge().secondary_node().index(),
191  place_b.edge().primary_node().index()
192  )
193  + place_b.proximal_length;
194 
195  // find min of the three cases
196  dist = std::min({ pp, pd, dp });
197  }
198 
199  return dist;
200 }
201 
203  Pquery const& pquery_a,
204  Pquery const& pquery_b,
205  utils::Matrix<size_t> const& node_path_lengths
206 ) {
207  return pquery_distance(
208  pquery_a,
209  pquery_b,
210  [&]( PqueryPlacement const& place_a, PqueryPlacement const& place_b ){
211  return placement_path_length_distance( place_a, place_b, node_path_lengths );
212  }
213  );
214 }
215 
217  PqueryPlacement const& place_a,
218  PqueryPlacement const& place_b,
219  utils::Matrix<size_t> const& node_path_lengths
220 ) {
221  // special case
222  if( place_a.edge().index() == place_b.edge().index() ) {
223  return 0;
224  }
225 
226  // primary primary
227  size_t const pp = node_path_lengths(
228  place_a.edge().primary_node().index(),
229  place_b.edge().primary_node().index()
230  );
231 
232  // primary secondary
233  size_t const ps = node_path_lengths(
234  place_a.edge().primary_node().index(),
235  place_b.edge().secondary_node().index()
236  );
237 
238  // secondary primary
239  size_t const sp = node_path_lengths(
240  place_a.edge().secondary_node().index(),
241  place_b.edge().primary_node().index()
242  );
243 
244  return std::min({ pp, ps, sp }) + 1;
245 }
246 
247 // =================================================================================================
248 // Pquery to Tree Element Distances
249 // =================================================================================================
250 
254 template<typename DistanceFunction>
255 double pquery_distance( Pquery const& pquery, DistanceFunction distance_function )
256 {
257  double sum = 0.0;
258 
259  // Calculate the weighted distance of the pquery, using the specified function.
260  #pragma omp parallel for
261  for( size_t pai = 0; pai < pquery.placement_size(); ++pai ) {
262  auto const& placement = pquery.placement_at( pai );
263 
264  double dist = distance_function( placement );
265  dist *= placement.like_weight_ratio;
266 
267  #pragma omp atomic
268  sum += dist;
269  }
270 
271  return sum;
272 }
273 
275  Pquery const& pquery,
276  tree::TreeNode const& node,
277  utils::Matrix<double> const& node_distances
278 ) {
279  return pquery_distance(
280  pquery,
281  [&]( PqueryPlacement const& placement ){
282  return placement_distance( placement, node, node_distances );
283  }
284  );
285 }
286 
288  PqueryPlacement const& placement,
289  tree::TreeNode const& node,
290  utils::Matrix<double> const& node_distances
291 ) {
292  // proximal
293  double const pd = placement.proximal_length
294  + node_distances( node.index(), placement.edge().primary_node().index() )
295  ;
296 
297  // distal
298  double const dd = placement.edge().data<tree::CommonEdgeData>().branch_length
299  - placement.proximal_length
300  + node_distances( node.index(), placement.edge().secondary_node().index() )
301  ;
302 
303  return std::min( pd, dd );
304 }
305 
306 // double pquery_path_length_distance(
307 // Pquery const& pquery,
308 // tree::TreeNode const& node,
309 // utils::Matrix<size_t> const& node_path_lengths
310 // ) {
311 // return pquery_distance(
312 // pquery,
313 // [&]( PqueryPlacement const& placement ){
314 // return placement_path_length_distance( placement, node, node_path_lengths );
315 // }
316 // );
317 // }
318 //
319 // size_t placement_path_length_distance(
320 // PqueryPlacement const& placement,
321 // tree::TreeNode const& node,
322 // utils::Matrix<size_t> const& node_path_lengths
323 // ){
324 // size_t const pd = node_path_lengths( placement.edge().primary_node().index(), node.index() );
325 // size_t const dd = node_path_lengths( placement.edge().secondary_node().index(), node.index() );
326 // return std::min( pd, dd );
327 // }
328 
330  Pquery const& pquery,
331  tree::TreeEdge const& edge,
332  utils::Matrix<size_t> const& edge_path_lengths
333 ){
334  return pquery_distance(
335  pquery,
336  [&]( PqueryPlacement const& placement ){
337  return placement_path_length_distance( placement, edge, edge_path_lengths );
338  }
339  );
340 }
341 
343  PqueryPlacement const& placement,
344  tree::TreeEdge const& edge,
345  utils::Matrix<size_t> const& edge_path_lengths
346 ){
347  return edge_path_lengths( placement.edge().index(), edge.index() );
348 }
349 
350 } // namespace placement
351 } // namespace genesis
genesis::utils::sum
double sum(const Histogram &h)
Definition: utils/math/histogram/stats.cpp:140
genesis::placement::PqueryPlacement::edge
PlacementTreeEdge const & edge() const
Get the PlacementTreeEdge where this PqueryPlacement is placed.
Definition: placement/pquery/placement.hpp:194
genesis::placement::PqueryPlain
Simple POD struct that stores the information of a Pquery in a simple format for speeding up some cal...
Definition: plain.hpp:74
genesis::placement::pquery_distance
double pquery_distance(Pquery const &pquery, tree::TreeNode const &node, utils::Matrix< double > const &node_distances)
Calculate the weighted distance between the PqueryPlacements of a Pquery and a tree::TreeNode,...
Definition: placement/function/distances.cpp:274
genesis::placement::pquery_distance
double pquery_distance(PqueryPlain const &pquery_a, PqueryPlain const &pquery_b, utils::Matrix< double > const &node_distances, bool with_pendant_length)
Calculate the weighted distance between two plain pqueries. It is mainly a helper method for distance...
Definition: placement/function/distances.cpp:51
pquery.hpp
genesis::tree::TreeEdge::index
size_t index() const
Return the index of this Edge.
Definition: edge.hpp:106
distances.hpp
genesis::tree::TreeEdge::secondary_node
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:162
genesis::utils::Matrix< double >
genesis::placement::placement_path_length_distance
size_t placement_path_length_distance(PqueryPlacement const &placement, tree::TreeEdge const &edge, utils::Matrix< size_t > const &edge_path_lengths)
Calculate the discrete distance from a PqueryPlacement to an edge, measured as the number of nodes be...
Definition: placement/function/distances.cpp:342
genesis::placement::Pquery::placement_at
PqueryPlacement & placement_at(size_t index)
Return the PqueryPlacement at a certain index.
Definition: pquery.cpp:118
genesis::placement::Pquery
A pquery holds a set of PqueryPlacements and a set of PqueryNames.
Definition: pquery.hpp:82
genesis::placement::placement_distance
double placement_distance(PqueryPlacement const &place_a, PqueryPlacement const &place_b, utils::Matrix< double > const &node_distances)
Calculate the distance between two PqueryPlacements, using their position on the tree::TreeEdges,...
Definition: placement/function/distances.cpp:157
genesis::tree::TreeEdge
Definition: edge.hpp:60
genesis::tree::TreeNode
Definition: tree/tree/node.hpp:58
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::TreeEdge::primary_node
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:146
genesis::placement::pquery_path_length_distance
double pquery_path_length_distance(Pquery const &pquery, tree::TreeEdge const &edge, utils::Matrix< size_t > const &edge_path_lengths)
Calculate the weighted discrete distance between the PqueryPlacements of a Pquery and a tree::TreeEdg...
Definition: placement/function/distances.cpp:329
genesis::placement::Pquery::placement_size
size_t placement_size() const
Return the number of PqueryPlacements stored in this Pquery.
Definition: pquery.cpp:113
genesis::placement::placement_distance
double placement_distance(PqueryPlacement const &placement, tree::TreeNode const &node, utils::Matrix< double > const &node_distances)
Calculate the distance in branch length units between a PqueryPlacement and a tree::TreeNode.
Definition: placement/function/distances.cpp:287
plain.hpp
Header of PqueryPlain class.
genesis::tree::CommonEdgeData
Common class containing the commonly needed data for tree edges.
Definition: tree/common_tree/tree.hpp:144
genesis::placement::PqueryPlacement
One placement position of a Pquery on a Tree.
Definition: placement/pquery/placement.hpp:75
genesis::tree::TreeNode::index
size_t index() const
Return the index of this Node.
Definition: tree/tree/node.hpp:102
genesis::tree::TreeEdge::data
EdgeDataType & data()
Definition: edge.hpp:217
genesis::placement::PqueryPlain::placements
std::vector< PqueryPlacementPlain > placements
Definition: plain.hpp:78
genesis::placement::PqueryPlacement::proximal_length
double proximal_length
Distance of this placement to the next node towards the root.
Definition: placement/pquery/placement.hpp:155
genesis::placement::PqueryPlacement::pendant_length
double pendant_length
Length of the attached branch of this placement.
Definition: placement/pquery/placement.hpp:165