A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cog.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 
43 #include "genesis/tree/tree.hpp"
44 
46 
47 #include <algorithm>
48 #include <cassert>
49 
50 namespace genesis {
51 namespace placement {
52 
53 // =================================================================================================
54 // Center of Gravity
55 // =================================================================================================
56 
57 std::pair<PlacementTreeEdge const*, double> center_of_gravity (
58  Sample const& smp,
59  bool const with_pendant_length
60 ) {
61  // This struct stores the torque that acts on a certain point (called the fulcrum) from a
62  // specific direction. It also stores the mass that created that torque, in order to be able to
63  // calculate the new torque when moving around the tree.
64  // In physics, torque is distance times force. However, we consider the force to be constant in
65  // the case of finding the center of gravity, so we neglect it and calculate torque as distance
66  // times mass.
67  struct Fulcrum {
68  Fulcrum() : mass(0.0), torque(0.0) {}
69 
70  double mass;
71  double torque;
72  };
73 
74  // Disable debug messages while code is not in review.
76 
77  // Store a balance value per link, so that each element contains the mass and its torque that
78  // lies downwards the tree in the direction of this link.
79  std::unordered_map<PlacementTreeLink const*, Fulcrum> balance;
80 
81  // Prepare a map from edges to placements on those edges.
82  auto place_map = placements_per_edge( smp );
83 
84  // -------------------------------------------------------------------------
85  // Collect All Masses, Calculate the Torque
86  // -------------------------------------------------------------------------
87 
88  // Do a postorder traversal. Collect all placement masses and push them towards the root in
89  // order to calculate the torque that acts on each node.
90  for( auto it : postorder( smp.tree() ) ) {
91  // Skip the last iteration, as we would assign an unneeded value to the first child
92  // of the root.
93  if( it.is_last_iteration() ) {
94  continue;
95  }
96 
97  // Collect the torque and mass that lies further down in the tree and act on the current
98  // iterators link.
99  Fulcrum curr_fulcrum;
100 
101  // Add up the masses from the current node's children.
102  PlacementTreeLink const* link = &it.link().next();
103  while( link != &it.link() ) {
104  // We do postorder traversal, so we have seen the child links of the current node,
105  // which means, they should be in the balance list already.
106  assert(balance.count(link));
107 
108  auto const& edge_data = it.edge().data<PlacementEdgeData>();
109  curr_fulcrum.mass += balance[link].mass;
110  curr_fulcrum.torque += balance[link].mass * edge_data.branch_length;
111  curr_fulcrum.torque += balance[link].torque;
112  link = &link->next();
113  }
114 
115  // Add up the masses of placements on the current edge.
116  for (PqueryPlacement const* place : place_map[ it.edge().index() ]) {
117  double place_dist = place->proximal_length;
118  if (with_pendant_length) {
119  place_dist += place->pendant_length;
120  }
121  curr_fulcrum.mass += place->like_weight_ratio;
122  curr_fulcrum.torque += place->like_weight_ratio * place_dist;
123  }
124 
125  assert( balance.count( &it.link().outer() ) == 0 );
126  balance[ &it.link().outer() ] = curr_fulcrum;
127  }
128 
129  // LOG_DBG << "current balance:";
130  // for (auto& v : balance) {
131  // LOG_DBG1 << "node " << v.first->node()->name << ", mass " << v.second.mass
132  // << ", torque " << v.second.torque;
133  // }
134 
135  // Now we have calculated all massed that lie down the tree as seen from the root and the torque
136  // they create. We can now start finding the edge where the center of gravity lies. This is done
137  // by going down the tree in the direction where the most torque comes from and at the same time
138  // pulling with us all the masses that come from the other nodes. Once we have more torque from
139  // behind us (speak: up in the tree) that lies ahead of us (speak: down the tree), we have found
140  // the center edge.
141 
142  // -------------------------------------------------------------------------
143  // Find Central Edge
144  // -------------------------------------------------------------------------
145 
146  // Keep track of the link whose edge we are currently examining, as well as the one that we
147  // examined previously (on iteration of the loop earlier). We start at the root.
148  PlacementTreeLink const* curr_link = &smp.tree().root_link();
149  PlacementTreeLink const* prev_link = nullptr;
150 
151  // For asserting purposes, we keep track of the number of loop iterations we do.
152  // This can never be more than the tree height (in number of nodes from root to deepest leaf)
153  // plus one last iteration for going back towards the root.
154  size_t num_iterations = 0;
155  auto depth_vector = node_path_length_vector(smp.tree());
156  size_t max_iterations = 1 + static_cast<size_t>(
157  *std::max_element(depth_vector.begin(), depth_vector.end())
158  );
159  // TODO turn this thing into a function similar to tree.height(), and name it aptly (find some
160  // better convention to distinguish between depth [number of nodes on a path] and distance [sum
161  // of branch lengths]. maybe distance and lengths instead?!).
162  // TODO once the center of gravity method is established, this assertion might be removed.
163 
164  LOG_DBG << "max it " << max_iterations;
165 
166  // Loop until the balancing edge is found.
167  while (true) {
168  assert (num_iterations <= max_iterations);
169  ++num_iterations;
170 
171  LOG_DBG << "iteration " << num_iterations;
172  LOG_DBG1 << "find max at " << curr_link->node().data<PlacementNodeData>().name;
173 
174  // Find the direction away from the current node that has the highest torque.
175  // At the same time, collect the sum of masses and torques at the node, in order to push
176  // them towards the node with highest torque later (so that the next iteration will have
177  // values to work on).
178  PlacementTreeLink const* max_link = nullptr;
179  double max_torque = -1.0;
180  Fulcrum sum;
181 
182  for( auto it_l : node_links( curr_link->node() )) {
183  // Make sure that we actually have a useable value.
184  assert(balance.count( &it_l.link() ) > 0);
185 
186  LOG_DBG2 << "at " << it_l.link().outer().node().data<PlacementNodeData>().name
187  << " with mass " << balance[ &it_l.link() ].mass
188  << " and torque " << balance[ &it_l.link() ].torque;
189  if (balance[ &it_l.link() ].torque > max_torque) {
190  max_link = &it_l.link();
191  max_torque = balance[ &it_l.link() ].torque;
192  }
193  sum.mass += balance[ &it_l.link() ].mass;
194  sum.torque += balance[ &it_l.link() ].torque;
195  }
196  assert(max_link);
197 
198  // Check if we found the edge where the center of gravity lies. This is the case when the
199  // the highest torque is coming from the direction where we came just from in the last
200  // iteration.
201  LOG_DBG1 << "moving to " << max_link->outer().node().data<PlacementNodeData>().name;
202  if( &max_link->outer() == prev_link ) {
203  LOG_DBG << "found between "
204  << curr_link->node().data<PlacementNodeData>().name
205  << " and " << prev_link->node().data<PlacementNodeData>().name;
206  break;
207  }
208 
209  // If we are not done yet, move down the edge.
210  prev_link = max_link;
211  curr_link = &max_link->outer();
212 
213  LOG_DBG1 << "mass sum " << sum.mass << ", torque sum " << sum.torque;
214 
215  // Now we are at a node where we have calculated only the masses and torques coming from
216  // further down in the tree so far, but not the values coming from the direction of the root
217  // (from where we just came). So we need to calculate these now:
218 
219  // Subtract the mass and torque of the direction where we found the most torque again,
220  // so that all that is left are the sums of all the other (not maximum) nodes. Then push it
221  // towards to the end of the edge.
222  auto const& max_edge_data = max_link->edge().data<PlacementEdgeData>();
223  sum.mass -= balance[max_link].mass;
224  sum.torque -= balance[max_link].torque;
225  sum.torque += sum.mass * max_edge_data.branch_length;
226 
227  // Add masses of the placements on this edge.
228  for( PqueryPlacement const* place : place_map[ max_link->edge().index() ]) {
229  double p_dist = max_edge_data.branch_length - place->proximal_length;
230  if (with_pendant_length) {
231  p_dist += place->pendant_length;
232  }
233  sum.mass += place->like_weight_ratio;
234  sum.torque += place->like_weight_ratio * p_dist;
235  }
236 
237  // Store the values at the corresponding link.
238  balance[curr_link] = sum;
239  LOG_DBG1 << "stored mass " << sum.mass << " and torque " << sum.torque << " at "
240  << max_link->outer().node().data<PlacementNodeData>().name;
241 
242  LOG_DBG << "end of iteration " << num_iterations << "\n";
243  }
244 
245  // Assert that the two links are actually the two ends of the same edge and that their nodes
246  // are the correct ones in terms of direction to the root.
247  assert( &curr_link->edge() == &prev_link->edge() );
248  assert( &prev_link->node() == &prev_link->edge().primary_node() );
249  assert( &curr_link->node() == &curr_link->edge().secondary_node() );
250 
251  // LOG_DBG << "current balance:";
252  // for (auto& v : balance) {
253  // LOG_DBG1 << "node " << v.first->node()->name << ", mass " << v.second.mass
254  // << ", torque " << v.second.torque;
255  // }
256 
257  LOG_DBG << "cur " << curr_link->node().data<PlacementNodeData>().name
258  << " with mass " << balance[curr_link].mass << " and torque " << balance[curr_link].torque;
259  LOG_DBG << "prev " << prev_link->node().data<PlacementNodeData>().name
260  << " with mass " << balance[prev_link].mass << " and torque " << balance[prev_link].torque;
261 
262  // At this point, we have found the central edge that balances the placement masses on the tree.
263  // curr_link is at the downwards (away from the root) end of this edge, while prev_link at its
264  // top (towards the root).
265  // All that is left now is to find the correct position on this edge. For this, we need to
266  // consider the masses and torques that come from both ends of the edge, as well as the
267  // placements on the edge itself.
268 
269  PlacementTreeEdge const* const central_edge = &curr_link->edge();
270 
271  // -------------------------------------------------------------------------
272  // Calculate Torques at Central Edge
273  // -------------------------------------------------------------------------
274 
275  // Define the masses and torques at both ends of the edge: proximal and distal mass/torque.
276  // Calculate them as the sums of the values from the subtree that lies behind the edge.
277  Fulcrum prox_fulcrum;
278  Fulcrum dist_fulcrum;
279 
280  assert(balance.count(curr_link) > 0);
281  assert(balance.count(prev_link) > 0);
282 
283  PlacementTreeLink const* link;
284  link = &prev_link->next();
285  while (link != prev_link) {
286  assert(balance.count(link));
287  prox_fulcrum.mass += balance[link].mass;
288  prox_fulcrum.torque += balance[link].torque;
289  link = &link->next();
290  }
291 
292  link = &curr_link->next();
293  while (link != curr_link) {
294  assert(balance.count(link));
295  dist_fulcrum.mass += balance[link].mass;
296  dist_fulcrum.torque += balance[link].torque;
297  link = &link->next();
298  }
299 
300  LOG_DBG << "prox_mass " << prox_fulcrum.mass << ", prox_torque " << prox_fulcrum.torque;
301  LOG_DBG << "dist_mass " << dist_fulcrum.mass << ", dist_torque " << dist_fulcrum.torque;
302 
303  // A simple approximation of the solution is to calculate the balancing point on the edge
304  // without considering the influence of the placements on the edge:
305  // Let x the solution, measured as length from the proximal node.
306  // Then we are looking for an x where the weights on both sides of it are in equilibrium:
307  // prox_torque + (prox_mass * x) = dist_torque + (dist_mass * (branch_length - x))
308  // <=> x = (dist_torque - prox_torque + (dist_mass * branch_length)) / (dist_mass + prox_mass)
309  // This however does not work when the mass that produced the most torque lies on the edge
310  // itself. The approximation breaks then and gives a length outside of the edge as result. Thus,
311  // we cannot ignore it in such a case. For example, this happens when all mass is on one edge.
312  // So, we need to check for those cases and if so, use the center of the edge as the
313  // approximation.
314  // In code:
315  double approx_proximal_length;
316  auto const& central_edge_data = central_edge->data<PlacementEdgeData>();
317  if (prox_fulcrum.mass == 0.0 || dist_fulcrum.mass == 0.0) {
318  approx_proximal_length = central_edge_data.branch_length / 2.0;
319  } else {
320  approx_proximal_length = (dist_fulcrum.torque - prox_fulcrum.torque
321  + (dist_fulcrum.mass * central_edge_data.branch_length))
322  / (dist_fulcrum.mass + prox_fulcrum.mass);
323  }
324 
325  LOG_DBG << "approx_proximal_length " << approx_proximal_length;
326  // return std::make_pair(central_edge, approx_proximal_length);
327 
328  // We will do an iteration that moves along the edge, balancing the torques on both sides until
329  // equilibrium is found. For this, we need to keep track of the masses and torques on the two
330  // sides: the variables hold those values as seen from the point that we are trying to find.
331  // At first, the prox_sum contains just prox_fulcrum, while dist_sum contains all values on the
332  // other side. During the iteration later, those two variables will change while moving along
333  // the edge, until equilibrium.
334  Fulcrum prox_sum = prox_fulcrum;
335  Fulcrum dist_sum = balance[prev_link];
336 
337  LOG_DBG << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
338  LOG_DBG << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
339 
340  // At this point, the torque on the proximal end of the edge cannot exceed the one on the other
341  // side, as this would mean that we have chosen the wrong edge as central edge.
342 
343  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
344  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
345  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
346 
347  // TODO the assertion is wrong. there are valid cases where it does not hold.
348  LOG_DBG1 << "assert " << dist_sum.torque << ">=" << prox_sum.torque;
349  // assert(dist_sum.torque >= prox_sum.torque);
350 
351  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
352  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
353  // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
354 
355  // -------------------------------------------------------------------------
356  // Find Center of the Edge
357  // -------------------------------------------------------------------------
358 
359  // We store the influence that each placement on the edge has.
360  struct BalancePoint
361  {
362  BalancePoint() : proximal_length(0.0), mass(0.0), pendant_torque(0.0) {};
363  BalancePoint(double prox_len) : proximal_length(prox_len), mass(0.0), pendant_torque(0.0) {};
364 
365  double proximal_length;
366  double mass;
367  double pendant_torque;
368  };
369 
370  // Make a list of all placements on the edge, sorted by their position on it.
371  // Also, as first and last element of the array, we store dummy elements for the proximal_length.
372  // This makes it obsolete to check them as boundary cases.
373  // We use a hand-sorted vector here (as opposed to a map, which does the ordering for us),
374  // because it provides element access "[]", which comes in handy later.
375  std::vector<BalancePoint> edge_balance;
376  edge_balance.reserve( place_map[ central_edge->index() ].size() + 2);
377  edge_balance.push_back(BalancePoint(0.0));
378 
379  double tqs = 0.0;
380  double mss = 0.0;
381 
382  // Sorts the placements on the central edge by their distance from the root, ascending.
383  auto sort_by_pos = [] ( PqueryPlacement const* lhs, PqueryPlacement const* rhs ) {
384  return lhs->proximal_length < rhs->proximal_length;
385  };
386  std::sort(
387  place_map[ central_edge->index() ].begin(),
388  place_map[ central_edge->index() ].end(),
389  sort_by_pos
390  );
391 
392  // Now add all placements on the edge to the balance variable, sorted by their proximal length.
393  for( PqueryPlacement const* place : place_map[ central_edge->index() ]) {
394  double place_prox = place->proximal_length;
395 
396  // Some sanity checks for wrong data. We do it here because otherwise the algorithm might
397  // produce weird results. However, usually this task is up to the validate() method.
398  if (place_prox > central_edge_data.branch_length) {
399  LOG_WARN << "Placement found that has proximal_length > branch_length.";
400  place_prox = central_edge_data.branch_length;
401  }
402  if (place_prox < 0.0) {
403  LOG_WARN << "Placement found that has proximal_length < 0.0.";
404  place_prox = 0.0;
405  }
406 
407  double place_pendant_torque = 0.0;
408  if (with_pendant_length) {
409  place_pendant_torque = place->like_weight_ratio * place->pendant_length;
410  }
411 
412  tqs += place_prox * place->like_weight_ratio;
413  mss += place->like_weight_ratio;
414 
415  BalancePoint place_balance;
416  place_balance.proximal_length = place_prox;
417  place_balance.mass = place->like_weight_ratio;
418  place_balance.pendant_torque = place_pendant_torque;
419 
420  edge_balance.push_back(place_balance);
421  }
422 
423  tqs += dist_fulcrum.torque - prox_fulcrum.torque
424  + (dist_fulcrum.mass * central_edge_data.branch_length);
425  mss += dist_fulcrum.mass + prox_fulcrum.mass;
426  double solution_without_pendant_length = tqs / mss;
427  LOG_DBG << "tqs " << tqs << ", mss " << mss;
428  LOG_DBG << "solution_without_pendant_length " << solution_without_pendant_length;
429 
430  return std::make_pair(central_edge, solution_without_pendant_length);
431 
432  // The rest code in this function tries to find a solution when pendant length shall be
433  // considered. However, it might happen that there is no exact solution in this case,
434  // so we decided to ignore pendant lengths on the central branch and return the result
435  // above instead.
436 
437  // -------------------------------------------------------------------------
438  // Experimental Extension
439  // -------------------------------------------------------------------------
440 
441  // Finally, story the dummy for the end of the edge.
442  edge_balance.push_back(BalancePoint( central_edge_data.branch_length ));
443 
444  LOG_DBG << "edge_balance:";
445  for (auto& e : edge_balance) {
446  LOG_DBG1 << "at " << e.proximal_length << " with mass " << e.mass
447  << " and pen torque " << e.pendant_torque;
448  }
449 
450  LOG_DBG << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
451  LOG_DBG << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
452 
453  // This is the loop where we find the center of the edge.
454  size_t pos = 1;
455  double dist_diff = 0.0;
456  for (; pos < edge_balance.size(); ++pos) {
457  auto& curr_point = edge_balance[pos];
458 
459  // Get the distance that we travelled from the last point on the edge. This is important to
460  // know how much to change the torques.
461  dist_diff = curr_point.proximal_length - edge_balance[pos-1].proximal_length;
462 
463  LOG_DBG1 << "iteration " << pos;
464 
465  LOG_DBG1 << "at " << curr_point.proximal_length << " with mass " << curr_point.mass
466  << " and pen torque " << curr_point.pendant_torque;
467 
468  LOG_DBG2 << "dist diff " << dist_diff;
469 
470  LOG_DBG2 << "prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
471  LOG_DBG2 << "dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
472 
473  // double pendant_torque_sum = 0.0;
474  // for (size_t i = 1; i < edge_balance.size(); ++i) {
475  // if (edge_balance[i].proximal_length == curr_point.proximal_length) {
476  // pendant_torque_sum += edge_balance[i].pendant_torque;
477  // } else if (i > pos) {
478  // break;
479  // }
480  // }
481  // LOG_DBG2 << "pendant_torque_sum " << pendant_torque_sum;
482 
483  if (
484  prox_sum.torque + prox_sum.mass * dist_diff >=
485  dist_sum.torque - dist_sum.mass * dist_diff
486  ) {
487  break;
488  }
489 
490  // Adjust the torques to the new point.
491  prox_sum.torque += prox_sum.mass * dist_diff + curr_point.pendant_torque;
492  dist_sum.torque -= dist_sum.mass * dist_diff + curr_point.pendant_torque;
493 
494  // Also the masses: the mass of the current point moves from the distal fulcrum to the
495  // proximal one.
496  prox_sum.mass += curr_point.mass;
497  dist_sum.mass -= curr_point.mass;
498 
499  LOG_DBG2 << "new prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
500  LOG_DBG2 << "new dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
501  }
502 
503  LOG_DBG << "final prox_sum mass " << prox_sum.mass << ", prox_sum torque " << prox_sum.torque;
504  LOG_DBG << "final dist_sum mass " << dist_sum.mass << ", dist_sum torque " << dist_sum.torque;
505 
506  LOG_DBG << "pos " << pos << " size " << edge_balance.size();
507 
508  // If the algorithm is correct, we will never finish the last iteration of the loop above,
509  // because that would imply that we still did not find our central part of the edge. We might
510  // leave the loop (via break) not until the last iteration (which means, that this is where the
511  // center lies), but we will never finish the loop via its default exit condition.
512  // So let's assert that we actually didn't.
513  assert(pos < edge_balance.size());
514 
515  // if (pos == edge_balance.size() - 1) {
516  // /* code */
517  // }
518 
519  dist_sum.torque -= dist_sum.mass * dist_diff;
520  double result_proximal_length = (dist_sum.torque - prox_sum.torque
521  + (dist_sum.mass * dist_diff))
522  / (dist_sum.mass + prox_sum.mass);
523  LOG_DBG << "result_proximal_length " << result_proximal_length;
524  result_proximal_length += edge_balance[pos-1].proximal_length;
525  LOG_DBG << "result_proximal_length " << result_proximal_length;
526 
527  return std::make_pair(central_edge, result_proximal_length);
528 }
529 
530 // =================================================================================================
531 // Center of Gravity Variance
532 // =================================================================================================
533 
535  Sample const& smp,
536  bool const with_pendant_length
537 ) {
538  double variance = 0.0;
539  double mass = 0.0;
540 
541  auto const cog = center_of_gravity(smp, with_pendant_length);
542  auto const central_edge = cog.first;
543  auto const& central_edge_data = central_edge->data<PlacementEdgeData>();
544  double const proximal_length = cog.second;
545 
546  LOG_DBG << "edge " << central_edge->primary_node().data<PlacementNodeData>().name
547  << " " << central_edge->secondary_node().data<PlacementNodeData>().name;
548  LOG_DBG << "prox " << proximal_length;
549 
550  auto node_dist_prox = node_branch_length_distance_vector(
551  smp.tree(), &central_edge->primary_node()
552  );
553  auto node_dist_dist = node_branch_length_distance_vector(
554  smp.tree(), &central_edge->secondary_node()
555  );
556 
557  for (const auto& pqry : smp.pqueries()) {
558  for( auto const& place : pqry.placements() ) {
559  auto const& place_edge_data = place.edge().data<PlacementEdgeData>();
560  double distance;
561 
562  if (place.edge().index() == central_edge->index()) {
563  distance = std::abs(place.proximal_length - proximal_length);
564  } else{
565  double pp, pd, dp;
566 
567  // proximal-proximal case
568  pp = proximal_length
569  + node_dist_prox[ place.edge().primary_node().index() ]
570  + place.proximal_length;
571 
572  // proximal-distal case
573  pd = proximal_length
574  + node_dist_prox[ place.edge().secondary_node().index() ]
575  + place_edge_data.branch_length - place.proximal_length;
576 
577  // distal-proximal case
578  dp = central_edge_data.branch_length - proximal_length
579  + node_dist_dist[ place.edge().primary_node().index() ]
580  + place.proximal_length;
581 
582  // find min of the three cases
583  distance = std::min(pp, std::min(pd, dp));
584  }
585 
586  if (with_pendant_length) {
587  distance += place.pendant_length;
588  }
589  variance += distance * distance * place.like_weight_ratio;
590  mass += place.like_weight_ratio;
591  }
592  }
593 
594  return variance / mass;
595 }
596 
597 // =================================================================================================
598 // Center of Gravity Distance
599 // =================================================================================================
600 
602  Sample const& smp_a,
603  Sample const& smp_b,
604  bool const with_pendant_length
605 ) {
606  if (!compatible_trees(smp_a, smp_b)) {
607  throw std::invalid_argument("__FUNCTION__: Incompatible trees.");
608  }
609 
610  // Disable debug messages while code is not in review.
612 
613  auto cog_a = center_of_gravity(smp_a, with_pendant_length);
614  auto cog_b = center_of_gravity(smp_b, with_pendant_length);
615 
616  auto edge_a = cog_a.first;
617  auto edge_b = cog_b.first;
618 
619  double prox_a = cog_a.second;
620  double prox_b = cog_b.second;
621 
622  // TODO this is for testing purposes only
623  if (prox_a < 0.0) {
624  LOG_INFO << "map a COG proximal_length < 0: " << prox_a;
625  }
626 
627  LOG_DBG << "cog a edge " << edge_a->index() << " prox " << prox_a;
628  LOG_DBG << "cog b edge " << edge_b->index() << " prox " << prox_b;
629 
630  // TODO turn the result of cog method into a class TreePoint or so, and write functions for this
631  // like distance to another point, which uses the code below. then replace the code by calling
632  // this function.
633 
634  double dist = -1.0;
635  if (edge_a->index() == edge_b->index()) {
636  // same branch case
637  dist = std::abs(prox_a - prox_b);
638  } else {
639  // TODO instead of the whole vector, we need a distnace between nodes function in the future,
640  // which probably uses the path iterator.
641  auto node_dist_a_prox = node_branch_length_distance_vector(
642  smp_a.tree(), &edge_a->primary_node()
643  );
644  auto node_dist_a_dist = node_branch_length_distance_vector(
645  smp_a.tree(), &edge_a->secondary_node()
646  );
647 
648  double pp, pd, dp;
649 
650  // proximal-proximal case
651  pp = prox_a
652  + node_dist_a_prox[edge_b->primary_node().index()]
653  + prox_b;
654 
655  // proximal-distal case
656  pd = prox_a
657  + node_dist_a_prox[edge_b->secondary_node().index()]
658  + edge_b->data<PlacementEdgeData>().branch_length - prox_b;
659 
660  // distal-proximal case
661  dp = edge_a->data<PlacementEdgeData>().branch_length - prox_a
662  + node_dist_a_dist[edge_b->primary_node().index()]
663  + prox_b;
664 
665  // find min of the three cases and
666  dist = std::min(pp, std::min(pd, dp));
667  }
668 
669  return dist;
670 }
671 
672 } // namespace placement
673 } // namespace genesis
std::vector< std::vector< PqueryPlacement const * > > placements_per_edge(Sample const &smp, bool only_max_lwr_placements)
Return a mapping from each PlacementTreeEdges to the PqueryPlacements that are placed on that edge...
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node...
#define LOG_DBG
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:101
double proximal_length
Distance of this placement to the next node towards the root.
Data class for PlacementTreeEdges. Stores the branch length of the edge, and the edge_num, as defined in the jplace standard.
PlacementTree & tree()
Get the PlacementTree of this Sample.
Definition: sample.cpp:119
utils::Range< IteratorNodeLinks< TreeLink const, TreeNode const, TreeEdge const > > node_links(ElementType const &element)
Definition: node_links.hpp:175
bool compatible_trees(PlacementTree const &lhs, PlacementTree const &rhs)
Return whether two PlacementTrees are compatible.
#define LOG_DBG1
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:104
#define LOG_WARN
Log a warning. See genesis::utils::LoggingLevel.
Definition: logging.hpp:95
Tree operator functions.
double center_of_gravity_distance(Sample const &smp_a, Sample const &smp_b, bool const with_pendant_length)
Calculate the distance between the two Centers of Gravity of two Samples.
Definition: cog.cpp:601
Provides functions for working with Placements and Pqueries.
One placement position of a Pquery on a Tree.
double variance(const Sample &smp, bool with_pendant_length)
Calculate the variance of the placements on a tree.
Definition: measures.cpp:232
double sum(const Histogram &h)
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:161
utils::Range< iterator_pqueries > pqueries()
Return a Range iterator to the Pqueries .
Definition: sample.cpp:259
double center_of_gravity_variance(Sample const &smp, bool const with_pendant_length)
Calcualte the variance of the PqueryPlacements of a Sample around its Center of Gravity.
Definition: cog.cpp:534
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:145
#define LOG_SCOPE_LEVEL(level)
This macro sets the logging level to a certain value for this scope and sets it back to the original ...
Definition: logging.hpp:148
Infos, for example when a file was written. See LOG_INFO.
Definition: logging.hpp:381
Header of Default Tree distance methods.
Provides easy and fast logging functionality.
Data class for PlacementTreeNodes. Stores a node name.
std::pair< PlacementTreeEdge const *, double > center_of_gravity(Sample const &smp, bool const with_pendant_length)
Calculate the Center of Gravity of the placements on a tree.
Definition: cog.cpp:57
double branch_length
Branch length of the edge.
Manage a set of Pqueries along with the PlacementTree where the PqueryPlacements are placed on...
Definition: sample.hpp:68
#define LOG_DBG2
Log a debug message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:107
NodeDataType & data()
Definition: node.hpp:152
TreeLink & root_link()
Return the TreeLink at the current root of the Tree.
Definition: tree/tree.cpp:232
std::vector< double > node_branch_length_distance_vector(Tree const &tree, TreeNode const *node)
Return a vector containing the distance of all nodes with respect to the given start node...
Header of Tree class.
Header of Tree distance methods.
utils::Range< IteratorPostorder< TreeLink const, TreeNode const, TreeEdge const > > postorder(ElementType const &element)
EdgeDataType & data()
Definition: edge.hpp:183
#define LOG_INFO
Log an info message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:98