A library for working with phylogenetic and population genetic data.
v0.32.0
cog.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
43 #include "genesis/tree/tree.hpp"
44 
46 
47 #include <algorithm>
48 #include <cassert>
49 
50 namespace genesis {
51 namespace placement {
52 
53 // =================================================================================================
54 // Center of Gravity
55 // =================================================================================================
56 
57 std::pair<PlacementTreeEdge const*, double> center_of_gravity (
58  Sample const& smp,
59  bool const with_pendant_length
60 ) {
61  // This struct stores the torque that acts on a certain point (called the fulcrum) from a
62  // specific direction. It also stores the mass that created that torque, in order to be able to
63  // calculate the new torque when moving around the tree.
64  // In physics, torque is distance times force. However, we consider the force to be constant in
65  // the case of finding the center of gravity, so we neglect it and calculate torque as distance
66  // times mass.
67  struct Fulcrum {
68  Fulcrum() : mass(0.0), torque(0.0) {}
69 
70  double mass;
71  double torque;
72  };
73 
74  // Disable debug messages while code is not in review.
76 
77  // Store a balance value per link, so that each element contains the mass and its torque that
78  // lies downwards the tree in the direction of this link.
79  std::unordered_map<PlacementTreeLink const*, Fulcrum> balance;
80 
81  // Prepare a map from edges to placements on those edges.
82  auto place_map = placements_per_edge( smp );
83 
84  // -------------------------------------------------------------------------
85  // Collect All Masses, Calculate the Torque
86  // -------------------------------------------------------------------------
87 
88  // Do a postorder traversal. Collect all placement masses and push them towards the root in
89  // order to calculate the torque that acts on each node.
90  for( auto it : postorder( smp.tree() ) ) {
91  // Skip the last iteration, as we would assign an unneeded value to the first child
92  // of the root.
93  if( it.is_last_iteration() ) {
94  continue;
95  }
96 
97  // Collect the torque and mass that lies further down in the tree and act on the current
98  // iterators link.
99  Fulcrum curr_fulcrum;
100 
101  // Add up the masses from the current node's children.
102  PlacementTreeLink const* link = &it.link().next();
103  while( link != &it.link() ) {
104  // We do postorder traversal, so we have seen the child links of the current node,
105  // which means, they should be in the balance list already.
106  assert(balance.count(link));
107 
108  auto const& edge_data = it.edge().data<PlacementEdgeData>();
109  curr_fulcrum.mass += balance[link].mass;
110  curr_fulcrum.torque += balance[link].mass * edge_data.branch_length;
111  curr_fulcrum.torque += balance[link].torque;
112  link = &link->next();
113  }
114 
115  // Add up the masses of placements on the current edge.
116  for (PqueryPlacement const* place : place_map[ it.edge().index() ]) {
117  double place_dist = place->proximal_length;
118  if (with_pendant_length) {
119  place_dist += place->pendant_length;
120  }
121  curr_fulcrum.mass += place->like_weight_ratio;
122  curr_fulcrum.torque += place->like_weight_ratio * place_dist;
123  }
124 
125  assert( balance.count( &it.link().outer() ) == 0 );
126  balance[ &it.link().outer() ] = curr_fulcrum;
127  }
128 
129  // LOG_DBG << "current balance:";
130  // for (auto& v : balance) {
131  // LOG_DBG1 << "node " << v.first->node()->name << ", mass " << v.second.mass
132  // << ", torque " << v.second.torque;
133  // }
134 
135  // Now we have calculated all massed that lie down the tree as seen from the root and the torque
136  // they create. We can now start finding the edge where the center of gravity lies. This is done
137  // by going down the tree in the direction where the most torque comes from and at the same time
138  // pulling with us all the masses that come from the other nodes. Once we have more torque from
139  // behind us (speak: up in the tree) that lies ahead of us (speak: down the tree), we have found
140  // the center edge.
141 
142  // -------------------------------------------------------------------------
143  // Find Central Edge
144  // -------------------------------------------------------------------------
145 
146  // Keep track of the link whose edge we are currently examining, as well as the one that we
147  // examined previously (on iteration of the loop earlier). We start at the root.
148  PlacementTreeLink const* curr_link = &smp.tree().root_link();
149  PlacementTreeLink const* prev_link = nullptr;
150 
151  // For asserting purposes, we keep track of the number of loop iterations we do.
152  // This can never be more than the tree height (in number of nodes from root to deepest leaf)
153  // plus one last iteration for going back towards the root.
154  size_t num_iterations = 0;
155  auto depth_vector = node_path_length_vector(smp.tree());
156  size_t max_iterations = 1 + static_cast<size_t>(
157  *std::max_element(depth_vector.begin(), depth_vector.end())
158  );
159  // TODO turn this thing into a function similar to tree.height(), and name it aptly (find some
160  // better convention to distinguish between depth [number of nodes on a path] and distance [sum
161  // of branch lengths]. maybe distance and lengths instead?!).
162  // TODO once the center of gravity method is established, this assertion might be removed.
163 
164  LOG_DBG << "max it " << max_iterations;
165 
166  // Loop until the balancing edge is found.
167  while (true) {
168  assert (num_iterations <= max_iterations);
169  ++num_iterations;
170 
171  LOG_DBG << "iteration " << num_iterations;
172  LOG_DBG1 << "find max at " << curr_link->node().data<PlacementNodeData>().name;
173 
174  // Find the direction away from the current node that has the highest torque.
175  // At the same time, collect the sum of masses and torques at the node, in order to push
176  // them towards the node with highest torque later (so that the next iteration will have
177  // values to work on).
178  PlacementTreeLink const* max_link = nullptr;
179  double max_torque = -1.0;
180  Fulcrum sum;
181 
182  for( auto it_l : node_links( curr_link->node() )) {
183  // Make sure that we actually have a useable value.
184  assert(balance.count( &it_l.link() ) > 0);
185 
186  LOG_DBG2 << "at " << it_l.link().outer().node().data<PlacementNodeData>().name
187  << " with mass " << balance[ &it_l.link() ].mass
188  << " and torque " << balance[ &it_l.link() ].torque;
189  if (balance[ &it_l.link() ].torque > max_torque) {
190  max_link = &it_l.link();
191  max_torque = balance[ &it_l.link() ].torque;
192  }
193  sum.mass += balance[ &it_l.link() ].mass;
194  sum.torque += balance[ &it_l.link() ].torque;
195  }
196  assert(max_link);
197 
198  // Check if we found the edge where the center of gravity lies. This is the case when the
199  // the highest torque is coming from the direction where we came just from in the last
200  // iteration.
201  LOG_DBG1 << "moving to " << max_link->outer().node().data<PlacementNodeData>().name;
202  if( &max_link->outer() == prev_link ) {
203  LOG_DBG << "found between "
204  << curr_link->node().data<PlacementNodeData>().name
205  << " and " << prev_link->node().data<PlacementNodeData>().name;
206  break;
207  }
208 
209  // If we are not done yet, move down the edge.
210  prev_link = max_link;
211  curr_link = &max_link->outer();
212 
213  LOG_DBG1 << "mass sum " << sum.mass << ", torque sum " << sum.torque;
214 
215  // Now we are at a node where we have calculated only the masses and torques coming from
216  // further down in the tree so far, but not the values coming from the direction of the root
217  // (from where we just came). So we need to calculate these now:
218 
219  // Subtract the mass and torque of the direction where we found the most torque again,
220  // so that all that is left are the sums of all the other (not maximum) nodes. Then push it
221  // towards to the end of the edge.
222  auto const& max_edge_data = max_link->edge().data<PlacementEdgeData>();
223  sum.mass -= balance[max_link].mass;
224  sum.torque -= balance[max_link].torque;
225  sum.torque += sum.mass * max_edge_data.branch_length;
226 
227  // Add masses of the placements on this edge.
228  for( PqueryPlacement const* place : place_map[ max_link->edge().index() ]) {
229  double p_dist = max_edge_data.branch_length - place->proximal_length;
230  if (with_pendant_length) {
231  p_dist += place->pendant_length;
232  }
233  sum.mass += place->like_weight_ratio;
234  sum.torque += place->like_weight_ratio * p_dist;
235  }
236 
237  // Store the values at the corresponding link.
238  balance[curr_link] = sum;
239  LOG_DBG1 << "stored mass " << sum.mass << " and torque " << sum.torque << " at "
240  << max_link->outer().node().data<PlacementNodeData>().name;
241 
242  LOG_DBG << "end of iteration " << num_iterations << "\n";
243  }
244 
245  // Assert that the two links are actually the two ends of the same edge and that their nodes
246  // are the correct ones in terms of direction to the root.
247  assert( &curr_link->edge() == &prev_link->edge() );
248  assert( &prev_link->node() == &prev_link->edge().primary_node() );
249  assert( &curr_link->node() == &curr_link->edge().secondary_node() );
250 
251  // LOG_DBG << "current balance:";
252  // for (auto& v : balance) {
253  // LOG_DBG1 << "node " << v.first->node()->name << ", mass " << v.second.mass
254  // << ", torque " << v.second.torque;
255  // }
256 
257  LOG_DBG << "cur " << curr_link->node().data<PlacementNodeData>().name
258  << " with mass " << balance[curr_link].mass << " and torque " << balance[curr_link].torque;
259  LOG_DBG << "prev " << prev_link->node().data<PlacementNodeData>().name
260  << " with mass " << balance[prev_link].mass << " and torque " << balance[prev_link].torque;
261 
262  // At this point, we have found the central edge that balances the placement masses on the tree.
263  // curr_link is at the downwards (away from the root) end of this edge, while prev_link at its
264  // top (towards the root).
265  // All that is left now is to find the correct position on this edge. For this, we need to
266  // consider the masses and torques that come from both ends of the edge, as well as the
267  // placements on the edge itself.
268 
269  PlacementTreeEdge const* const central_edge = &curr_link->edge();
270 
271  // -------------------------------------------------------------------------
272  // Calculate Torques at Central Edge
273  // -------------------------------------------------------------------------
274 
275  // Define the masses and torques at both ends of the edge: proximal and distal mass/torque.
276  // Calculate them as the sums of the values from the subtree that lies behind the edge.
277  Fulcrum prox_fulcrum;
278  Fulcrum dist_fulcrum;
279 
280  assert(balance.count(curr_link) > 0);
281  assert(balance.count(prev_link) > 0);
282 
283  PlacementTreeLink const* link;
284  link = &prev_link->next();
285  while (link != prev_link) {
286  assert(balance.count(link));
287  prox_fulcrum.mass += balance[link].mass;
288  prox_fulcrum.torque += balance[link].torque;
289  link = &link->next();
290  }
291 
292  link = &curr_link->next();
293  while (link != curr_link) {
294  assert(balance.count(link));
295  dist_fulcrum.mass += balance[link].mass;
296  dist_fulcrum.torque += balance[link].torque;
297  link = &link->next();
298  }
299 
300  LOG_DBG << "prox_mass " << prox_fulcrum.mass << ", prox_torque " << prox_fulcrum.torque;
301  LOG_DBG << "dist_mass " << dist_fulcrum.mass << ", dist_torque " << dist_fulcrum.torque;
302 
303  // A simple approximation of the solution is to calculate the balancing point on the edge
304  // without considering the influence of the placements on the edge:
305  // Let x the solution, measured as length from the proximal node.
306  // Then we are looking for an x where the weights on both sides of it are in equilibrium:
307  // prox_torque + (prox_mass * x) = dist_torque + (dist_mass * (branch_length - x))
308  // <=> x = (dist_torque - prox_torque + (dist_mass * branch_length)) / (dist_mass + prox_mass)
309  // This however does not work when the mass that produced the most torque lies on the edge
310  // itself. The approximation breaks then and gives a length outside of the edge as result. Thus,
311  // we cannot ignore it in such a case. For example, this happens when all mass is on one edge.
312  // So, we need to check for those cases and if so, use the center of the edge as the
313  // approximation.
314  // In code:
315  double approx_proximal_length;
316  auto const& central_edge_data = central_edge->data<PlacementEdgeData>();
317  if (prox_fulcrum.mass == 0.0 || dist_fulcrum.mass == 0.0) {
318  approx_proximal_length = central_edge_data.branch_length / 2.0;
319  } else {
320  approx_proximal_length = (dist_fulcrum.torque - prox_fulcrum.torque
321  + (dist_fulcrum.mass * central_edge_data.branch_length))
322  / (dist_fulcrum.mass + prox_fulcrum.mass);
323  }
324 
325  LOG_DBG << "approx_proximal_length " << approx_proximal_length;
326  // return std::make_pair(central_edge, approx_proximal_length);
327 
328  // We will do an iteration that moves along the edge, balancing the torques on both sides until
329  // equilibrium is found. For this, we need to keep track of the masses and torques on the two
330  // sides: the variables hold those values as seen from the point that we are trying to find.
331  // At first, the prox_sum contains just prox_fulcrum, while dist_sum contains all values on the
332  // other side. During the iteration later, those two variables will change while moving along
333  // the edge, until equilibrium.
334  Fulcrum prox_sum = prox_fulcrum;
335  Fulcrum dist_sum = balance[prev_link];
336 
337  LOG_DBG << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
338  LOG_DBG << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
339 
340  // At this point, the torque on the proximal end of the edge cannot exceed the one on the other
341  // side, as this would mean that we have chosen the wrong edge as central edge.
342 
343  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
344  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
345  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
346 
347  // TODO the assertion is wrong. there are valid cases where it does not hold.
348  LOG_DBG1 << "assert " << dist_sum.torque << ">=" << prox_sum.torque;
349  // assert(dist_sum.torque >= prox_sum.torque);
350 
351  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
352  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
353  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
354 
355  // -------------------------------------------------------------------------
356  // Find Center of the Edge
357  // -------------------------------------------------------------------------
358 
359  // We store the influence that each placement on the edge has.
360  struct BalancePoint
361  {
362  BalancePoint() : proximal_length(0.0), mass(0.0), pendant_torque(0.0) {}
363  BalancePoint(double prox_len) : proximal_length(prox_len), mass(0.0), pendant_torque(0.0) {}
364 
365  double proximal_length;
366  double mass;
367  double pendant_torque;
368  };
369 
370  // Make a list of all placements on the edge, sorted by their position on it.
371  // Also, as first and last element of the array, we store dummy elements for the proximal_length.
372  // This makes it obsolete to check them as boundary cases.
373  // We use a hand-sorted vector here (as opposed to a map, which does the ordering for us),
374  // because it provides element access "[]", which comes in handy later.
375  std::vector<BalancePoint> edge_balance;
376  edge_balance.reserve( place_map[ central_edge->index() ].size() + 2);
377  edge_balance.push_back(BalancePoint(0.0));
378 
379  double tqs = 0.0;
380  double mss = 0.0;
381 
382  // Sorts the placements on the central edge by their distance from the root, ascending.
383  auto sort_by_pos = [] ( PqueryPlacement const* lhs, PqueryPlacement const* rhs ) {
384  return lhs->proximal_length < rhs->proximal_length;
385  };
386  std::sort(
387  place_map[ central_edge->index() ].begin(),
388  place_map[ central_edge->index() ].end(),
389  sort_by_pos
390  );
391 
392  // Now add all placements on the edge to the balance variable, sorted by their proximal length.
393  for( PqueryPlacement const* place : place_map[ central_edge->index() ]) {
394  double place_prox = place->proximal_length;
395 
396  // Some sanity checks for wrong data. We do it here because otherwise the algorithm might
397  // produce weird results. However, usually this task is up to the validate() method.
398  if (place_prox > central_edge_data.branch_length) {
399  LOG_WARN << "Placement found that has proximal_length > branch_length.";
400  place_prox = central_edge_data.branch_length;
401  }
402  if (place_prox < 0.0) {
403  LOG_WARN << "Placement found that has proximal_length < 0.0.";
404  place_prox = 0.0;
405  }
406 
407  double place_pendant_torque = 0.0;
408  if (with_pendant_length) {
409  place_pendant_torque = place->like_weight_ratio * place->pendant_length;
410  }
411 
412  tqs += place_prox * place->like_weight_ratio;
413  mss += place->like_weight_ratio;
414 
415  BalancePoint place_balance;
416  place_balance.proximal_length = place_prox;
417  place_balance.mass = place->like_weight_ratio;
418  place_balance.pendant_torque = place_pendant_torque;
419 
420  edge_balance.push_back(place_balance);
421  }
422 
423  tqs += dist_fulcrum.torque - prox_fulcrum.torque
424  + (dist_fulcrum.mass * central_edge_data.branch_length);
425  mss += dist_fulcrum.mass + prox_fulcrum.mass;
426  double solution_without_pendant_length = tqs / mss;
427  LOG_DBG << "tqs " << tqs << ", mss " << mss;
428  LOG_DBG << "solution_without_pendant_length " << solution_without_pendant_length;
429 
430  return std::make_pair(central_edge, solution_without_pendant_length);
431 
432  // The rest code in this function tries to find a solution when pendant length shall be
433  // considered. However, it might happen that there is no exact solution in this case,
434  // so we decided to ignore pendant lengths on the central branch and return the result
435  // above instead.
436 
437  // -------------------------------------------------------------------------
438  // Experimental Extension
439  // -------------------------------------------------------------------------
440 
441  /*
442 
443  // Finally, story the dummy for the end of the edge.
444  edge_balance.push_back(BalancePoint( central_edge_data.branch_length ));
445 
446  LOG_DBG << "edge_balance:";
447  for (auto& e : edge_balance) {
448  LOG_DBG1 << "at " << e.proximal_length << " with mass " << e.mass
449  << " and pen torque " << e.pendant_torque;
450  }
451 
452  LOG_DBG << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
453  LOG_DBG << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
454 
455  // This is the loop where we find the center of the edge.
456  size_t pos = 1;
457  double dist_diff = 0.0;
458  for (; pos < edge_balance.size(); ++pos) {
459  auto& curr_point = edge_balance[pos];
460 
461  // Get the distance that we travelled from the last point on the edge. This is important to
462  // know how much to change the torques.
463  dist_diff = curr_point.proximal_length - edge_balance[pos-1].proximal_length;
464 
465  LOG_DBG1 << "iteration " << pos;
466 
467  LOG_DBG1 << "at " << curr_point.proximal_length << " with mass " << curr_point.mass
468  << " and pen torque " << curr_point.pendant_torque;
469 
470  LOG_DBG2 << "dist diff " << dist_diff;
471 
472  LOG_DBG2 << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
473  LOG_DBG2 << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
474 
475  // double pendant_torque_sum = 0.0;
476  // for (size_t i = 1; i < edge_balance.size(); ++i) {
477  // if (edge_balance[i].proximal_length == curr_point.proximal_length) {
478  // pendant_torque_sum += edge_balance[i].pendant_torque;
479  // } else if (i > pos) {
480  // break;
481  // }
482  // }
483  // LOG_DBG2 << "pendant_torque_sum " << pendant_torque_sum;
484 
485  if (
486  prox_sum.torque + prox_sum.mass * dist_diff >=
487  dist_sum.torque - dist_sum.mass * dist_diff
488  ) {
489  break;
490  }
491 
492  // Adjust the torques to the new point.
493  prox_sum.torque += prox_sum.mass * dist_diff + curr_point.pendant_torque;
494  dist_sum.torque -= dist_sum.mass * dist_diff + curr_point.pendant_torque;
495 
496  // Also the masses: the mass of the current point moves from the distal fulcrum to the
497  // proximal one.
498  prox_sum.mass += curr_point.mass;
499  dist_sum.mass -= curr_point.mass;
500 
501  LOG_DBG2 << "new prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
502  LOG_DBG2 << "new dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
503  }
504 
505  LOG_DBG << "final prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
506  LOG_DBG << "final dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
507 
508  LOG_DBG << "pos " << pos << " size " << edge_balance.size();
509 
510  // If the algorithm is correct, we will never finish the last iteration of the loop above,
511  // because that would imply that we still did not find our central part of the edge. We might
512  // leave the loop (via break) not until the last iteration (which means, that this is where the
513  // center lies), but we will never finish the loop via its default exit condition.
514  // So let's assert that we actually didn't.
515  assert(pos < edge_balance.size());
516 
517  // if (pos == edge_balance.size() - 1) {
518  // // code...
519  // }
520 
521  dist_sum.torque -= dist_sum.mass * dist_diff;
522  double result_proximal_length = (dist_sum.torque - prox_sum.torque
523  + (dist_sum.mass * dist_diff))
524  / (dist_sum.mass + prox_sum.mass);
525  LOG_DBG << "result_proximal_length " << result_proximal_length;
526  result_proximal_length += edge_balance[pos-1].proximal_length;
527  LOG_DBG << "result_proximal_length " << result_proximal_length;
528 
529  return std::make_pair(central_edge, result_proximal_length);
530 
531  */
532 }
533 
534 // =================================================================================================
535 // Center of Gravity Variance
536 // =================================================================================================
537 
539  Sample const& smp,
540  bool const with_pendant_length
541 ) {
542  double variance = 0.0;
543  double mass = 0.0;
544 
545  auto const cog = center_of_gravity(smp, with_pendant_length);
546  auto const central_edge = cog.first;
547  auto const& central_edge_data = central_edge->data<PlacementEdgeData>();
548  double const proximal_length = cog.second;
549 
550  LOG_DBG << "edge " << central_edge->primary_node().data<PlacementNodeData>().name
551  << " " << central_edge->secondary_node().data<PlacementNodeData>().name;
552  LOG_DBG << "prox " << proximal_length;
553 
554  auto node_dist_prox = node_branch_length_distance_vector(
555  smp.tree(), &central_edge->primary_node()
556  );
557  auto node_dist_dist = node_branch_length_distance_vector(
558  smp.tree(), &central_edge->secondary_node()
559  );
560 
561  for (const auto& pqry : smp.pqueries()) {
562  for( auto const& place : pqry.placements() ) {
563  auto const& place_edge_data = place.edge().data<PlacementEdgeData>();
564  double distance;
565 
566  if (place.edge().index() == central_edge->index()) {
567  distance = std::abs(place.proximal_length - proximal_length);
568  } else{
569  double pp, pd, dp;
570 
571  // proximal-proximal case
572  pp = proximal_length
573  + node_dist_prox[ place.edge().primary_node().index() ]
574  + place.proximal_length;
575 
576  // proximal-distal case
577  pd = proximal_length
578  + node_dist_prox[ place.edge().secondary_node().index() ]
579  + place_edge_data.branch_length - place.proximal_length;
580 
581  // distal-proximal case
582  dp = central_edge_data.branch_length - proximal_length
583  + node_dist_dist[ place.edge().primary_node().index() ]
584  + place.proximal_length;
585 
586  // find min of the three cases
587  distance = std::min(pp, std::min(pd, dp));
588  }
589 
590  if (with_pendant_length) {
591  distance += place.pendant_length;
592  }
593  variance += distance * distance * place.like_weight_ratio;
594  mass += place.like_weight_ratio;
595  }
596  }
597 
598  return variance / mass;
599 }
600 
601 // =================================================================================================
602 // Center of Gravity Distance
603 // =================================================================================================
604 
606  Sample const& smp_a,
607  Sample const& smp_b,
608  bool const with_pendant_length
609 ) {
610  if (!compatible_trees(smp_a, smp_b)) {
611  throw std::invalid_argument("__FUNCTION__: Incompatible trees.");
612  }
613 
614  // Disable debug messages while code is not in review.
616 
617  auto cog_a = center_of_gravity(smp_a, with_pendant_length);
618  auto cog_b = center_of_gravity(smp_b, with_pendant_length);
619 
620  auto edge_a = cog_a.first;
621  auto edge_b = cog_b.first;
622 
623  double prox_a = cog_a.second;
624  double prox_b = cog_b.second;
625 
626  // TODO this is for testing purposes only
627  if (prox_a < 0.0) {
628  LOG_INFO << "map a COG proximal_length < 0: " << prox_a;
629  }
630 
631  LOG_DBG << "cog a edge " << edge_a->index() << " prox " << prox_a;
632  LOG_DBG << "cog b edge " << edge_b->index() << " prox " << prox_b;
633 
634  // TODO turn the result of cog method into a class TreePoint or so, and write functions for this
635  // like distance to another point, which uses the code below. then replace the code by calling
636  // this function.
637 
638  double dist = -1.0;
639  if (edge_a->index() == edge_b->index()) {
640  // same branch case
641  dist = std::abs(prox_a - prox_b);
642  } else {
643  // TODO instead of the whole vector, we need a distnace between nodes function in the future,
644  // which probably uses the path iterator.
645  auto node_dist_a_prox = node_branch_length_distance_vector(
646  smp_a.tree(), &edge_a->primary_node()
647  );
648  auto node_dist_a_dist = node_branch_length_distance_vector(
649  smp_a.tree(), &edge_a->secondary_node()
650  );
651 
652  double pp, pd, dp;
653 
654  // proximal-proximal case
655  pp = prox_a
656  + node_dist_a_prox[edge_b->primary_node().index()]
657  + prox_b;
658 
659  // proximal-distal case
660  pd = prox_a
661  + node_dist_a_prox[edge_b->secondary_node().index()]
662  + edge_b->data<PlacementEdgeData>().branch_length - prox_b;
663 
664  // distal-proximal case
665  dp = edge_a->data<PlacementEdgeData>().branch_length - prox_a
666  + node_dist_a_dist[edge_b->primary_node().index()]
667  + prox_b;
668 
669  // find min of the three cases and
670  dist = std::min(pp, std::min(pd, dp));
671  }
672 
673  return dist;
674 }
675 
676 } // namespace placement
677 } // namespace genesis
LOG_INFO
#define LOG_INFO
Log an info message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:100
genesis::placement::Sample::tree
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:124
genesis::utils::sum
double sum(const Histogram &h)
Definition: utils/math/histogram/stats.cpp:140
genesis::tree::node_branch_length_distance_vector
std::vector< double > node_branch_length_distance_vector(Tree const &tree, TreeNode const *node)
Return a vector containing the distance of all nodes with respect to the given start node,...
Definition: tree/common_tree/distances.cpp:121
genesis::placement::Sample
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on.
Definition: sample.hpp:68
genesis::utils::Logging::kInfo
@ kInfo
Infos, for example when a file was written. See LOG_INFO.
Definition: logging.hpp:416
tree.hpp
Header of Tree class.
genesis::tree::node_links
utils::Range< IteratorNodeLinks< true > > node_links(ElementType const &element)
Definition: node_links.hpp:186
functions.hpp
Provides functions for working with Placements and Pqueries.
LOG_DBG2
#define LOG_DBG2
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:124
genesis::tree::TreeEdge::secondary_node
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:162
genesis::placement::center_of_gravity_variance
double center_of_gravity_variance(Sample const &smp, bool const with_pendant_length)
Calculate the variance of the PqueryPlacements of a Sample around its Center of Gravity.
Definition: cog.cpp:538
distances.hpp
Header of CommonTree distance methods.
LOG_WARN
#define LOG_WARN
Log a warning. See genesis::utils::LoggingLevel.
Definition: logging.hpp:97
genesis::placement::PlacementEdgeData
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num,...
Definition: placement_tree.hpp:139
logging.hpp
Provides easy and fast logging functionality.
genesis::placement::PlacementNodeData
Data class for PlacementTreeNodes. Stores a node name.
Definition: placement_tree.hpp:87
genesis::tree::CommonEdgeData::branch_length
double branch_length
Branch length of the edge.
Definition: tree/common_tree/tree.hpp:193
genesis::tree::Tree::root_link
TreeLink & root_link()
Return the TreeLink at the current root of the Tree.
Definition: tree/tree.hpp:284
genesis::placement::center_of_gravity_distance
double center_of_gravity_distance(Sample const &smp_a, Sample const &smp_b, bool const with_pendant_length)
Calculate the distance between the two Centers of Gravity of two Samples.
Definition: cog.cpp:605
genesis::tree::TreeEdge
Definition: edge.hpp:60
genesis::placement::variance
double variance(const Sample &smp, bool with_pendant_length)
Calculate the variance of the placements on a tree.
Definition: measures.cpp:195
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::TreeEdge::primary_node
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:146
operators.hpp
Tree operator functions.
LOG_DBG1
#define LOG_DBG1
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:121
operators.hpp
LOG_SCOPE_LEVEL
#define LOG_SCOPE_LEVEL(level)
This macro sets the logging level to a certain value for this scope and sets it back to the original ...
Definition: logging.hpp:165
cog.hpp
genesis::placement::PqueryPlacement
One placement position of a Pquery on a Tree.
Definition: placement/pquery/placement.hpp:75
genesis::placement::placements_per_edge
std::vector< std::vector< PqueryPlacement const * > > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge.
Definition: placement/function/helper.cpp:106
sample.hpp
genesis::tree::TreeNode::data
NodeDataType & data()
Definition: tree/tree/node.hpp:203
postorder.hpp
genesis::placement::center_of_gravity
std::pair< PlacementTreeEdge const *, double > center_of_gravity(Sample const &smp, bool const with_pendant_length)
Calculate the Center of Gravity of the placements on a tree.
Definition: cog.cpp:57
LOG_DBG
#define LOG_DBG
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:118
distances.hpp
Header of Tree distance methods.
genesis::placement::Sample::pqueries
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:264
genesis::tree::node_path_length_vector
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node.
Definition: tree/function/distances.cpp:98
genesis::placement::PqueryPlacement::proximal_length
double proximal_length
Distance of this placement to the next node towards the root.
Definition: placement/pquery/placement.hpp:155
genesis::placement::compatible_trees
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
Definition: placement/function/operators.cpp:63
helper.hpp
genesis::tree::postorder
utils::Range< IteratorPostorder< true > > postorder(ElementType const &element)
Definition: tree/iterator/postorder.hpp:321