A library for working with phylogenetic and population genetic data.
v0.27.0
tree/function/manipulation.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 #include "genesis/tree/tree.hpp"
39 
43 
44 #include <algorithm>
45 #include <cassert>
46 #include <vector>
47 
48 namespace genesis {
49 namespace tree {
50 
51 // =================================================================================================
52 // Minimal Tree
53 // =================================================================================================
54 
56 {
57  Tree result;
58 
59  // Get container access shortcuts.
60  auto& links = result.expose_link_container();
61  auto& nodes = result.expose_node_container();
62  auto& edges = result.expose_edge_container();
63 
64  // Create all needed elements...
65  auto link1_u = utils::make_unique< TreeLink >();
66  auto link2_u = utils::make_unique< TreeLink >();
67  auto node1_u = utils::make_unique< TreeNode >();
68  auto node2_u = utils::make_unique< TreeNode >();
69  auto edge0_u = utils::make_unique< TreeEdge >();
70 
71  // ... and connect them with each other.
72  link1_u->reset_index( 0 );
73  link1_u->reset_next( link1_u.get() );
74  link1_u->reset_outer( link2_u.get() );
75  link1_u->reset_node( node1_u.get() );
76  link1_u->reset_edge( edge0_u.get() );
77 
78  link2_u->reset_index( 1 );
79  link2_u->reset_next( link2_u.get() );
80  link2_u->reset_outer( link1_u.get() );
81  link2_u->reset_node( node2_u.get() );
82  link2_u->reset_edge( edge0_u.get() );
83 
84  node1_u->reset_index( 0 );
85  node1_u->reset_primary_link( link1_u.get() );
86 
87  node2_u->reset_index( 1 );
88  node2_u->reset_primary_link( link2_u.get() );
89 
90  edge0_u->reset_index( 0 );
91  edge0_u->reset_primary_link( link1_u.get() );
92  edge0_u->reset_secondary_link( link2_u.get() );
93 
94  // Finally, move all elements to the tree and reset the tree root link.
95  links.push_back( std::move( link1_u ));
96  links.push_back( std::move( link2_u ));
97  nodes.push_back( std::move( node1_u ));
98  nodes.push_back( std::move( node2_u ));
99  edges.push_back( std::move( edge0_u ));
100  result.reset_root_link( links.front().get() );
101 
102  return result;
103 }
104 
105 // =================================================================================================
106 // Add Nodes
107 // =================================================================================================
108 
109 TreeNode& add_new_node( Tree& tree, TreeNode& target_node )
110 {
111  // Basic check.
112  if( ! belongs_to( target_node, tree ) ) {
113  throw std::runtime_error(
114  "Cannot add Node to a Tree where the given Node is not part of the Tree."
115  );
116  }
117 
118  // Get container access shortcuts.
119  auto& links = tree.expose_link_container();
120  auto& nodes = tree.expose_node_container();
121  auto& edges = tree.expose_edge_container();
122 
123  // Create all new elements that we need:
124  // 1. A link that gets added to the given node and connects it to the new node.
125  // 2. The link that belongs to the newly created leaf node.
126  // 3. The newly created node itself.
127  // 4. The edge between the two nodes.
128  auto con_link_u = utils::make_unique< TreeLink >();
129  auto end_link_u = utils::make_unique< TreeLink >();
130  auto end_node_u = utils::make_unique< TreeNode >();
131  auto con_edge_u = utils::make_unique< TreeEdge >();
132 
133  // Get the pointers, for ease of use.
134  auto const con_link = con_link_u.get();
135  auto const end_link = end_link_u.get();
136  auto const end_node = end_node_u.get();
137  auto const con_edge = con_edge_u.get();
138 
139  // Populate the link at the given node.
140  con_link->reset_index( links.size() );
141  con_link->reset_node( &target_node );
142  con_link->reset_edge( con_edge );
143  con_link->reset_outer( end_link );
144 
145  // Find the last link of the given node (in traversal order around the node).
146  auto const up_link = &target_node.primary_link();
147  auto last_link = up_link;
148  while( &last_link->next() != up_link ) {
149  last_link = &last_link->next();
150  }
151 
152  // Now insert the new link in between up_link and last_link.
153  assert( &last_link->next() == up_link );
154  last_link->reset_next( con_link );
155  con_link->reset_next( up_link );
156  assert( &last_link->next() == con_link );
157  assert( &con_link->next() == up_link );
158 
159  // Add the link to the tree. This changes the size of the links vector, so the next call to
160  // links.size() will give a new, proper value for the other link.
161  links.push_back( std::move( con_link_u ));
162 
163  // Populate the link at the new node and add it to the tree.
164  end_link->reset_index( links.size() );
165  end_link->reset_node( end_node );
166  end_link->reset_edge( con_edge );
167  end_link->reset_next( end_link );
168  end_link->reset_outer( con_link );
169  links.push_back( std::move( end_link_u ));
170 
171  // Populate the new node and add it to the tree.
172  end_node->reset_index( nodes.size() );
173  end_node->reset_primary_link( end_link );
174  end_node->reset_data( target_node.data_ptr()->recreate() );
175  nodes.push_back( std::move( end_node_u ));
176 
177  // Populate the new edge and add it to the tree.
178  con_edge->reset_index( edges.size() );
179  con_edge->reset_primary_link( con_link );
180  con_edge->reset_secondary_link( end_link );
181  con_edge->reset_data( target_node.primary_link().edge().data_ptr()->recreate() );
182  edges.push_back( std::move( con_edge_u ));
183 
184  // Return the new node. We just moved the node uniq ptr, but not the pointee, so this is valid.
185  return *end_node;
186 }
187 
189  Tree& tree,
190  TreeEdge& target_edge,
191  std::function<void( TreeEdge& target_edge, TreeEdge& new_edge )> adjust_edges
192 ) {
193  // Basic check.
194  if( ! belongs_to( target_edge, tree ) ) {
195  throw std::runtime_error(
196  "Cannot add Node to Tree on an Edge that is not part of the Tree."
197  );
198  }
199 
200  // Get container access shortcuts.
201  auto& links = tree.expose_link_container();
202  auto& nodes = tree.expose_node_container();
203  auto& edges = tree.expose_edge_container();
204 
205  // Create all new elements that we need:
206  // * Two links that build a new node in the middle of the target edge.
207  // * The new node in the middle of the target edge.
208  // * A new edge that connects to the secondary end of the target edge.
209  auto pri_link_u = utils::make_unique< TreeLink >();
210  auto sec_link_u = utils::make_unique< TreeLink >();
211  auto mid_node_u = utils::make_unique< TreeNode >();
212  auto sec_edge_u = utils::make_unique< TreeEdge >();
213 
214  // Get the pointers, for ease of use.
215  auto const pri_link = pri_link_u.get();
216  auto const sec_link = sec_link_u.get();
217  auto const mid_node = mid_node_u.get();
218  auto const sec_edge = sec_edge_u.get();
219 
220  // Populate the link towards the primary end of the target edge and add it to the tree.
221  pri_link->reset_index( links.size() );
222  pri_link->reset_next( sec_link );
223  pri_link->reset_outer( &target_edge.primary_link() );
224  pri_link->reset_node( mid_node );
225  pri_link->reset_edge( &target_edge );
226  links.push_back( std::move( pri_link_u ));
227 
228  // Populate the link towards the secondary end of the target edge and add it to the tree.
229  sec_link->reset_index( links.size() );
230  sec_link->reset_next( pri_link );
231  sec_link->reset_outer( &target_edge.secondary_link() );
232  sec_link->reset_node( mid_node );
233  sec_link->reset_edge( sec_edge );
234  links.push_back( std::move( sec_link_u ));
235 
236  // Populate the new node in the middle of the target edge and add it to the tree.
237  mid_node->reset_index( nodes.size() );
238  mid_node->reset_primary_link( pri_link );
239  mid_node->reset_data( target_edge.primary_node().data_ptr()->recreate() );
240  nodes.push_back( std::move( mid_node_u ));
241 
242  // Populate the edge at the secondary end of the target edge and add it to the tree.
243  sec_edge->reset_index( edges.size() );
244  sec_edge->reset_primary_link( sec_link );
245  sec_edge->reset_secondary_link( &target_edge.secondary_link() );
246  sec_edge->reset_data( target_edge.data_ptr()->recreate() );
247  edges.push_back( std::move( sec_edge_u ));
248 
249  // Finally adjust the existing tree elements.
250  target_edge.primary_link().reset_outer( pri_link );
251  target_edge.secondary_link().reset_outer( sec_link );
252  target_edge.secondary_link().reset_edge( sec_edge );
253  target_edge.reset_secondary_link( pri_link );
254 
255  // If we have a transform function, call it.
256  if( adjust_edges ) {
257  adjust_edges( target_edge, *sec_edge );
258  }
259 
260  return *mid_node;
261 }
262 
264  Tree& tree,
265  TreeEdge& target_edge,
266  std::function<void( TreeEdge& target_edge, TreeEdge& new_edge )> adjust_edges
267 ) {
268  // First add a node that splits the edge, and then a new leaf node to this one.
269  auto& mid_node = add_new_node( tree, target_edge, adjust_edges );
270  return add_new_node( tree, mid_node );
271 }
272 
273 // =================================================================================================
274 // Delete Nodes
275 // =================================================================================================
276 
277 void delete_node( Tree& tree, TreeNode& target_node )
278 {
279  // Basic check.
280  if( ! belongs_to( target_node, tree ) ) {
281  throw std::runtime_error(
282  "Cannot delete Node from a Tree that is not part of the Tree."
283  );
284  }
285 
286  auto const deg = degree( target_node );
287  if( deg == 1 ) {
288  delete_leaf_node( tree, target_node );
289  } else if( deg == 2 ) {
290  delete_linear_node( tree, target_node );
291  } else {
292  delete_subtree( tree, Subtree{ target_node } );
293  }
294 }
295 
296 void delete_leaf_node( Tree& tree, TreeNode& target_node )
297 {
298  // Basic check.
299  if( ! belongs_to( target_node, tree ) ) {
300  throw std::runtime_error(
301  "Cannot delete node from a tree that is not part of the tree."
302  );
303  }
304  if( degree( target_node ) != 1 ) {
305  throw std::runtime_error(
306  "Cannot delete leaf node if the node is not actually a leaf."
307  );
308  }
309  if( tree.link_count() <= 2 ) {
310  assert( tree.node_count() == 2 && tree.edge_count() == 1 );
311  throw std::runtime_error(
312  "Cannot delete leaf node from a minmal tree that only contains two nodes."
313  );
314  }
315 
316  // Node has to be a leaf.
317  assert( &target_node.link().next() == &target_node.link() );
318 
319  // Get container access shortcuts.
320  auto& links = tree.expose_link_container();
321  auto& nodes = tree.expose_node_container();
322  auto& edges = tree.expose_edge_container();
323 
324  // If the node to be deleted is the root, we need to reset to its adjacent node.
325  // We need to check this now, and reset later, because the link indices will change...
326  // Also, if the root is the node where the subtree is attached, we might need to reset
327  // the root link, because the attachement link might get deleted.
328  // For this, we use the first link of the node where the node is attached.
329  auto root_link = &tree.root_link();
330  if( tree.root_node().index() == target_node.index() || &target_node.link().outer() == &tree.root_link() ) {
331  root_link = &target_node.link().outer().next();
332 
333  // Also, if we reset the root link, we need to update the primary link of the new root node.
334  // Do some checks for this. (The commented line is an alternative to resetting it.
335  // We now do this later, at the end of the function.)
336  assert( &target_node.link().outer().node().link() == &target_node.link().outer() );
337  assert( &root_link->node() == &target_node.link().outer().node() );
338  // target_node.link().outer().node().reset_primary_link( root_link );
339  }
340 
341  // Delete the edge and adjust indices of other edges as needed.
342  // We do this first, so that the link to the edge is still valid.
343  auto const edge_idx = target_node.link().edge().index();
344  edges.erase( edges.begin() + edge_idx );
345  for( size_t i = edge_idx; i < tree.edge_count(); ++i ) {
346  assert( tree.edge_at(i).index() == i + 1 );
347  tree.edge_at(i).reset_index(i);
348  }
349 
350  // Make the node that the target is attached to forget about this node by skipping the link.
351  auto& attach_link = target_node.link().outer();
352  assert( &attach_link.edge() == &target_node.link().edge() );
353  assert( &attach_link.outer() == &target_node.link() );
354  auto link_ptr = &( attach_link.next() );
355  while( &link_ptr->next() != &attach_link ) {
356  link_ptr = &link_ptr->next();
357  }
358  assert( &link_ptr->next() == &attach_link );
359  assert( &link_ptr->next().next() == &attach_link.next() );
360  link_ptr->reset_next( &link_ptr->next().next() );
361 
362  // Delete both links, and adjust the indices of the other links as needed.
363  // Deleting two elemetns is tricky, as the indices shift...
364  auto const minmax_link_idx = utils::minmax_value( attach_link.index(), attach_link.outer().index() );
365  links.erase( links.begin() + minmax_link_idx.first );
366  links.erase( links.begin() + minmax_link_idx.second - 1 );
367  for( size_t i = minmax_link_idx.first; i < links.size(); ++i ) {
368  assert( tree.link_at(i).index() == i + 1 || tree.link_at(i).index() == i + 2 );
369  tree.link_at(i).reset_index(i);
370  }
371 
372  // Finally, delete the node and adjust indices of other nodes as needed.
373  auto const node_idx = target_node.index();
374  nodes.erase( nodes.begin() + node_idx );
375  for( size_t i = node_idx; i < tree.node_count(); ++i ) {
376  assert( tree.node_at(i).index() == i + 1 );
377  tree.node_at(i).reset_index(i);
378  }
379 
380  // Lastly, reset the root link (or set it to what it was before...)
381  tree.reset_root_link( root_link );
382  tree.root_link().node().reset_primary_link( root_link );
383 }
384 
386  Tree& tree,
387  TreeNode& target_node,
388  std::function<void( TreeEdge& remaining_edge, TreeEdge& deleted_edge )> adjust_edges
389 ) {
390  // Basic check.
391  if( ! belongs_to( target_node, tree ) ) {
392  throw std::runtime_error(
393  "Cannot delete Node from a Tree that is not part of the Tree."
394  );
395  }
396  if( degree( target_node ) != 2 ) {
397  throw std::runtime_error(
398  "Cannot delete linear Node if the Node is not actually linear (degree 2)."
399  );
400  }
401 
402  // Before we do anything, call the adjust function.
403  if( adjust_edges ) {
404  adjust_edges( target_node.primary_link().edge(), target_node.primary_link().next().edge() );
405  }
406 
407  // If the node to be deleted is the root, we need to reset to its adjacent node.
408  // We need to check this now, and reset later, because the node indices will change...
409  auto root_link = &tree.root_link();
410  if( tree.root_node().index() == target_node.index() ) {
411  root_link = &target_node.link().outer();
412  }
413 
414  // Get container access shortcuts.
415  auto& links = tree.expose_link_container();
416  auto& nodes = tree.expose_node_container();
417  auto& edges = tree.expose_edge_container();
418 
419  // Adjust the links of the two adjacent nodes to point to each other.
420  auto& pr_link = target_node.link();
421  auto& adj_link_p = pr_link.outer();
422  auto& adj_link_d = pr_link.next().outer();
423  assert( &adj_link_p.outer().node() == &adj_link_d.outer().node() );
424  adj_link_p.reset_outer( &adj_link_d );
425  adj_link_d.reset_outer( &adj_link_p );
426 
427  // Adjust the pointers to and from the remaining edge.
428  // We are resetting the primary adjacent edge here. This also takes care of setting it to
429  // another node in case that the deleted node is the root.
430  // The first assertion needs a special case for this (if the deletion node is the root).
431  assert( &adj_link_p.edge().secondary_node() == &target_node || root_link != &tree.root_link() );
432  assert( &adj_link_p.edge() == &target_node.link().edge() );
433  assert( &adj_link_d.edge() == &target_node.link().next().edge() );
434  assert( &adj_link_d.edge().primary_node() == &target_node );
435  adj_link_p.edge().reset_primary_link( &adj_link_p );
436  adj_link_p.edge().reset_secondary_link( &adj_link_d );
437  adj_link_d.reset_edge( &target_node.primary_link().edge() );
438 
439  // Delete the edge and adjust indices of other edges as needed.
440  auto const edge_idx = pr_link.next().edge().index();
441  edges.erase( edges.begin() + edge_idx );
442  for( size_t i = edge_idx; i < tree.edge_count(); ++i ) {
443  assert( tree.edge_at(i).index() == i + 1 );
444  tree.edge_at(i).reset_index(i);
445  }
446 
447  // Delete both links of the node, and adjust the indices of the other links as needed.
448  // Deleting two elemetns is tricky, as the indices shift...
449  auto const minmax_link_idx = utils::minmax_value( pr_link.index(), pr_link.next().index() );
450  links.erase( links.begin() + minmax_link_idx.first );
451  links.erase( links.begin() + minmax_link_idx.second - 1 );
452  for( size_t i = minmax_link_idx.first; i < links.size(); ++i ) {
453  assert( tree.link_at(i).index() == i + 1 || tree.link_at(i).index() == i + 2 );
454  tree.link_at(i).reset_index(i);
455  }
456 
457  // Finally, delete the node and adjust indices of other nodes as needed.
458  auto const node_idx = target_node.index();
459  nodes.erase( nodes.begin() + node_idx );
460  for( size_t i = node_idx; i < tree.node_count(); ++i ) {
461  assert( tree.node_at(i).index() == i + 1 );
462  tree.node_at(i).reset_index(i);
463  }
464 
465  // Lastly, reset the root link (or set it to what it was before...)
466  tree.reset_root_link( root_link );
467 }
468 
475 template<class T>
476 static void delete_from_tree_container_( T& old_elems, std::vector<size_t>& del_elems )
477 {
478  // Sort the deletion list, so that we do not need to seek in it. There should be no duplicats.
479  std::sort( del_elems.begin(), del_elems.end() );
480  assert( std::adjacent_find( del_elems.begin(), del_elems.end()) == del_elems.end() );
481 
482  // We never delete the whole tree!
483  assert( del_elems.size() < old_elems.size() );
484 
485  // Create an empty container and move the needed elements there.
486  // This is just so much easier than trying to delete within a container
487  // while iterating it, adjusting the internal indices of the elemetns, etc.
488  auto new_elems = T{};
489  new_elems.reserve( old_elems.size() - del_elems.size() );
490 
491  // Move elements and adjust indices.
492  size_t del_elems_idx = 0;
493  for( size_t i = 0; i < old_elems.size(); ++i ) {
494  assert( i == old_elems[i]->index() );
495  if( del_elems_idx < del_elems.size() && del_elems[del_elems_idx] == i ) {
496 
497  // If the current index is in the deletion list, don't move the element,
498  // and go to the next element of the deletion list.
499  ++del_elems_idx;
500  } else {
501 
502  // If the current index is not in the deletion list, move it to the new container.
503  assert( old_elems[i]->index() == i );
504  assert( new_elems.size() == i - del_elems_idx );
505  old_elems[i]->reset_index( i - del_elems_idx );
506  new_elems.emplace_back( std::move( old_elems[i] ));
507  assert( new_elems.back()->index() == new_elems.size() - 1 );
508  }
509  }
510 
511  // Swap, which will put the elements that we want to delete in the new container,
512  // which will destroy them when going out of scope.
513  assert( new_elems.size() + del_elems.size() == old_elems.size() );
514  std::swap( new_elems, old_elems );
515  // old_elems = std::move( new_elems );
516 }
517 
518 void delete_subtree( Tree& tree, Subtree const& subtree )
519 {
520  // Basic check.
521  if( ! belongs_to( subtree, tree ) ) {
522  throw std::runtime_error(
523  "Cannot delete Subtree from a Tree that is not part of the Tree."
524  );
525  }
526  if( is_leaf( subtree.link().outer() )) {
527  throw std::runtime_error(
528  "Cannot delete Subtree from Tree if this leads to a singluar Node."
529  );
530  }
531 
532  // Make lists of all indices that need to be deleted.
533  // This already includes the edge towards the rest of the tree, as well as the link
534  // at the attachment node!
535  // Also, find out if whe are deleting the root. If so, we need to reset it later.
536  std::vector<size_t> del_nodes;
537  std::vector<size_t> del_edges;
538  std::vector<size_t> del_links;
539  bool contains_root = false;
540  for( auto it : preorder( subtree )) {
541  del_nodes.push_back( it.node().index() );
542  del_edges.push_back( it.edge().index() );
543  del_links.push_back( it.link().index() );
544  del_links.push_back( it.link().outer().index() );
545 
546  if( is_root( it.node() )) {
547  contains_root = true;
548  }
549  }
550 
551  // If we are about to delete the root, store the new link for later.
552  // Also, if the root is the node where the subtree is attached, we might need to reset
553  // the root link, because the attachement link might get deleted.
554  // For this, we use the first link of the node where the subtree is attached.
555  // This cannot be a leaf node (as we already checked this earlier), otherwise we'd be setting
556  // it to the attachement link itself, because if this was the case, it's next() is itself.
557  // If not, we just store the current one, so that setting it later doesn't change anything.
558  assert( ! is_leaf( subtree.link().outer() ));
559  auto root_link = &tree.root_link();
560  if( contains_root || &subtree.link().outer() == &tree.root_link() ) {
561  root_link = &tree.link_at( subtree.link().outer().next().index() );
562  }
563 
564  // Get the link that points to the attachment link
565  // (the one on the remaining node that will be deleted).
566  auto& attach_link = subtree.link().outer();
567  assert( &attach_link.edge() == &subtree.link().edge() );
568  assert( &attach_link.outer() == &subtree.link() );
569  auto link_ptr = &( attach_link.next() );
570  while( &link_ptr->next() != &attach_link ) {
571  link_ptr = &link_ptr->next();
572  }
573  assert( &link_ptr->next() == &attach_link );
574  assert( &link_ptr->next().next() == &attach_link.next() );
575  auto& link_nc = tree.link_at( link_ptr->index() );
576 
577  // If the primary link of the attachment node is about to be deleted, reset it to another link.
578  if( &link_nc.node().primary_link() == &link_nc.next() ) {
579  assert( &attach_link.node().primary_link() == &attach_link );
580  link_nc.node().reset_primary_link( &link_nc.next().next() );
581  }
582 
583  // Make the node that the target is attached to forget about this node by skipping the link.
584  // Because the subtree always stores a const link, we need to get a non-const from the tree.
585  link_nc.reset_next( &link_nc.next().next() );
586 
587  // Delete the elements at the given indices.
591 
592  // Reset the root if neeeded. This uses the new index of the pointee.
593  tree.reset_root_link( root_link );
594  tree.root_link().node().reset_primary_link( root_link );
595 }
596 
598  Tree& tree,
599  TreeEdge& target_edge,
600  std::function<void( TreeNode& remaining_node, TreeNode& deleted_node )> adjust_nodes
601 ) {
602  // Basic checks.
603  if( ! belongs_to( target_edge, tree ) ) {
604  throw std::runtime_error(
605  "Cannot delete edge from a tree that is not part of the tree."
606  );
607  }
608  if( tree.node_count() == 2 ) {
609  assert( is_leaf( target_edge.primary_node() ));
610  assert( is_leaf( target_edge.secondary_node() ));
611  assert( tree.link_count() == 2 && tree.edge_count() == 1 );
612  throw std::runtime_error(
613  "Cannot delete edge from minimal tree that only consists of two nodes."
614  );
615  }
616  assert( !( is_leaf( target_edge.primary_node() ) && is_leaf( target_edge.secondary_node() ) ));
617 
618  // Special cases where one of the nodes at the end of the edge is a leaf. In these cases, we can
619  // simply delete the node instead. We treat them here this way, as their implementation with
620  // the below algorithm is just too complex...
621  if( is_leaf( target_edge.primary_node() )) {
622  // If the primary node is a leaf, that means (as it is primary) it also is the root.
623  // In that case, we need to reverse the node adjustment function.
624  assert( is_root( target_edge.primary_node() ));
625  assert( degree( target_edge.primary_node() ) == 1 );
626 
627  if( adjust_nodes ) {
628  adjust_nodes( target_edge.secondary_node(), target_edge.primary_node() );
629  }
630  delete_leaf_node( tree, target_edge.primary_node() );
631  return;
632  }
633  if( is_leaf( target_edge.secondary_node() )) {
634  assert( degree( target_edge.secondary_node() ) == 1 );
635  if( adjust_nodes ) {
636  adjust_nodes( target_edge.primary_node(), target_edge.secondary_node() );
637  }
638  delete_leaf_node( tree, target_edge.secondary_node() );
639  return;
640  }
641 
642  // Now we are done with special cases. Both nodes have neighbors, that is, they are not leaves.
643  assert( ! is_leaf( target_edge.primary_node() ));
644  assert( ! is_leaf( target_edge.secondary_node() ));
645  assert( degree( target_edge.primary_node() ) > 1 );
646  assert( degree( target_edge.secondary_node() ) > 1 );
647 
648  // Before we do anything, call the adjust function.
649  if( adjust_nodes ) {
650  adjust_nodes( target_edge.primary_node(), target_edge.secondary_node() );
651  }
652 
653  // Adjust the distal links of the secondary node of the deleted edge.
654  auto sec_lnk = &target_edge.secondary_link().next();
655  while( sec_lnk != &target_edge.secondary_link() ) {
656  sec_lnk->reset_node( &target_edge.primary_node() );
657 
658  // If we are at the last distal link, we need to adjust its next pointer to the new nodes
659  // next pointer. We then need to break out of the loop (hence, actually never triggering its
660  // normal exit condition... - we could have just used `true` as the loop condition instead,
661  // but the current one is more meaningful to read), as we just reset the next link, so we
662  // cannot follow it any more to get around in the current node. We are done then.
663  if( &sec_lnk->next() == &target_edge.secondary_link() ) {
664  sec_lnk->reset_next( &target_edge.primary_link().next() );
665  break;
666  }
667 
668  // In all other cases (while working through all links of the node), we go to the next.
669  sec_lnk = &sec_lnk->next();
670  }
671  assert( &sec_lnk->next() == &target_edge.primary_link().next() );
672 
673  // Now find the link of the primary node whose next() link is the primary link of the edge.
674  // We need to reset this link to point to our newly moved link instead, as its current next()
675  // link is going to be deleted.
676  auto prm_lnk = &target_edge.primary_link();
677  while( &prm_lnk->next() != &target_edge.primary_link() ) {
678  prm_lnk = &prm_lnk->next();
679  }
680  assert( &prm_lnk->next() == &target_edge.primary_link() );
681  prm_lnk->reset_next( &target_edge.secondary_link().next() );
682 
683  // In the special case that our to-be-deleted primary link is set as the tree's root link,
684  // we need to reset properly. We have just reset the prm_lnk next pointer, so we can use
685  // that as our new root link. We also need to reset the link of that node, as in that special
686  // case, the primary link of the node is also the one that we are about to delete.
687  if( &tree.root_link() == &target_edge.primary_link() ) {
688  assert( &target_edge.primary_node().link() == &target_edge.primary_link() );
689  assert( &target_edge.primary_node().link() == &tree.root_link() );
690  tree.reset_root_link( &prm_lnk->next() );
691  prm_lnk->node().reset_primary_link( &prm_lnk->next() );
692  }
693 
694  // Delete the elements at the given indices.
695  std::vector<size_t> del_nodes = { target_edge.secondary_node().index() };
696  std::vector<size_t> del_edges = { target_edge.index() };
697  std::vector<size_t> del_links = {
698  target_edge.primary_link().index(),
699  target_edge.secondary_link().index()
700  };
704 }
705 
706 void delete_zero_branch_length_edges( Tree& tree, bool include_leaf_edges )
707 {
708  size_t del_idx;
709  do {
710  // Find the next edge that has a branch length of zero.
711  del_idx = tree.edge_count();
712  for( auto const& edge : tree.edges() ) {
713  if( is_leaf(edge) && ! include_leaf_edges ) {
714  continue;
715  }
716  if(
717  edge.data_is_derived_from<CommonEdgeData>() &&
718  edge.data<CommonEdgeData>().branch_length == 0.0
719  ) {
720  del_idx = edge.index();
721  break;
722  }
723  }
724 
725  // If we found one, delete it. If the remaining inner node does not have a name, use
726  // the one from the one that is about to be deleted. For all inner nodes that usually do
727  // not have a name anyway, that does not change that. But if a leaf node is deleted,
728  // we transfer its name. Not sure if that is good, but it sounds reasonable.
729  if( del_idx < tree.edge_count() ) {
730  delete_edge( tree, tree.edge_at( del_idx ), []( TreeNode& rem_node, TreeNode& del_node ){
731  if(
732  del_node.data_is_derived_from<CommonNodeData>() &&
733  rem_node.data_is_derived_from<CommonNodeData>() &&
734  rem_node.data<CommonNodeData>().name.empty()
735  ) {
736  rem_node.data<CommonNodeData>().name = del_node.data<CommonNodeData>().name;
737  }
738  });
739  }
740  } while( del_idx < tree.edge_count() );
741 }
742 
743 // =================================================================================================
744 // Rerooting
745 // =================================================================================================
746 
748  Tree& tree,
749  TreeEdge& target_edge,
750  std::function<void( TreeEdge& target_edge, TreeEdge& new_edge )> adjust_edges
751 ) {
752  if( is_rooted( tree )) {
753  throw std::runtime_error( "Cannot make a Tree rooted if it is already rooted." );
754  }
755 
756  auto& mid_node = add_new_node( tree, target_edge, adjust_edges );
757  change_rooting( tree, mid_node );
758  return mid_node;
759 }
760 
762  Tree& tree,
763  std::function<void( TreeEdge& target_edge, TreeEdge& new_edge )> adjust_edges
764 ) {
765  return make_rooted( tree, tree.root_link().edge(), adjust_edges );
766 }
767 
769  Tree& tree,
770  std::function<void( TreeEdge& remaining_edge, TreeEdge& deleted_edge )> adjust_edges
771 ) {
772  if( ! is_rooted( tree )) {
773  throw std::runtime_error( "Cannot make a Tree unrooted if it is already unrooted." );
774  }
775 
776  delete_linear_node( tree, tree.root_node(), adjust_edges );
777 }
778 
779 void change_rooting( Tree& tree, TreeLink& at_link )
780 {
781  if( ! belongs_to( at_link, tree ) ) {
782  throw std::runtime_error( "Cannot reroot Tree on a Link that is not part of the Tree." );
783  }
784 
785  // We store the old root node, becasue we will change internals of the tree, so that
786  // node().is_root() won't work while this function is running.
787  TreeNode* old_root = &tree.root_node();
788 
789  // Pointer to the primary link of the target node.
790  TreeLink* cur_link = &tree.link_at( at_link.index() ).node().primary_link();
791 
792  // Set new root index and node link of the new root.
793  assert( &at_link == &tree.link_at( at_link.index() ));
794  tree.reset_root_link( &at_link );
795  at_link.node().reset_primary_link( &tree.link_at( at_link.index() ));
796 
797  // Walk the path from the new root to the old, and change all pointers of the edges and nodes
798  // on that path so that they point towards the new root.
799  while( &cur_link->node() != old_root ) {
800 
801  // Assert that the primary direction is correct: Is the current link is at the secondary
802  // end of its edge?
803  assert( cur_link == &cur_link->edge().secondary_link() );
804 
805  // Swap the edge's links, so that they point towards the new root.
806  auto link_p_tmp = &cur_link->edge().primary_link();
807  auto link_s_tmp = &cur_link->edge().secondary_link();
808  cur_link->edge().reset_primary_link( link_s_tmp );
809  cur_link->edge().reset_secondary_link( link_p_tmp );
810  // std::swap( cur_link->edge_->link_p_, cur_link->edge_->link_s_ );
811 
812  // Assert that this worked.
813  assert( cur_link == &cur_link->edge().primary_link() );
814  assert( &cur_link->outer() == &cur_link->edge().secondary_link() );
815 
816  // Store the link of the next node that points towards the root.
817  // We need it, because we will change this upwards link of the next node now.
818  auto to_root_link = &cur_link->outer().node().primary_link();
819 
820  // Change the main link of the next node so that it points towards the new root.
821  cur_link->outer().node().reset_primary_link( &cur_link->outer() );
822 
823  // Move one node towards the root.
824  cur_link = to_root_link;
825  }
826 }
827 
828 void change_rooting( Tree& tree, TreeNode& at_node )
829 {
830  if( ! belongs_to( at_node, tree ) ) {
831  throw std::runtime_error( "Cannot reroot Tree on a Node that is not part of the Tree." );
832  }
833  change_rooting( tree, at_node.link() );
834 }
835 
836 // =================================================================================================
837 // Ladderize
838 // =================================================================================================
839 
840 void ladderize( Tree& tree, LadderizeOrder order )
841 {
842  // For each node, get how many nodes its subtree (away from the root) has.
843  // We use this quantity to sort each node's links.
844  auto sub_sizes = subtree_sizes( tree );
845 
846  // Ladderize all nodes
847  for( auto& node : tree.nodes() ) {
848 
849  // No need to ladderize a leaf. It would still work, but we can use this as a speedup.
850  if( is_leaf( node )) {
851  continue;
852  }
853 
854  // Get the sizes of the children/subtrees of this node.
855  std::vector<size_t> child_sizes;
856  std::vector<TreeLink*> child_links;
857  for( auto link_it : node_links( node ) ) {
858 
859  // Don't treat the link towards the root; we only want to sort the subtree.
860  // Assert that the first iteration is actually this link towards the root.
861  if( link_it.is_first_iteration() ) {
862  assert( &link_it.link() == &node.primary_link() );
863  continue;
864  }
865 
866  child_sizes.push_back( sub_sizes[ link_it.link().outer().node().index() ] );
867  child_links.push_back( &link_it.link() );
868  }
869 
870  // Sort the sizes. We use stable sort in order to not change the order of equal sized subtrees.
871  auto child_order = ( order == LadderizeOrder::kSmallFirst
872  ? utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::less<size_t>() )
873  : utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::greater<size_t>() )
874  );
875 
876  // The number of indices needs to be the rank of the node (number of immediate children).
877  assert( child_order.size() == child_sizes.size() );
878  assert( child_order.size() == child_links.size() );
879  assert( child_order.size() == degree( node ) - 1 );
880 
881  // Change all next links of the node so that they reflect the subtree size order.
882  auto cur_link = &node.primary_link();
883  for( auto child_order_i : child_order ) {
884 
885  // We use this assertion to ensure that each link is only processed once.
886  // At the end of this loop, we set it to nullptr, so a second encounter would fail.
887  assert( child_links[ child_order_i ] );
888 
889  // Set the link's next link and move on.
890  cur_link->reset_next( child_links[ child_order_i ] );
891  cur_link = child_links[ child_order_i ];
892 
893  // Set the link in the vector to null, so that the above assert can check that we
894  // never process it again.
895  child_links[ child_order_i ] = nullptr;
896  }
897 
898  // We now need to set the next pointer of the last link of the node so that it points
899  // back to the original starting node (the one towards the root).
900  cur_link->reset_next( &node.primary_link() );
901 
902  // Finally, assert that we processed all links. If so, all of them are null by now.
903  for( auto const& cl : child_links ) {
904  (void) cl;
905  assert( !cl );
906  }
907  }
908 }
909 
910 } // namespace tree
911 } // namespace genesis
genesis::placement::swap
void swap(Sample &lhs, Sample &rhs)
Definition: sample.cpp:104
genesis::tree::TreeEdge::primary_link
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.hpp:114
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::tree::TreeNode::reset_primary_link
TreeNode & reset_primary_link(TreeLink *val)
Reset the internal pointer to the TreeLink of this TreeNode.
Definition: tree/tree/node.hpp:276
genesis::tree::delete_from_tree_container_
static void delete_from_tree_container_(T &old_elems, std::vector< size_t > &del_elems)
Local helper function template that takes one of the tree element containers and deletes all elements...
Definition: tree/function/manipulation.cpp:476
genesis::tree::Subtree::link
TreeLink const & link() const
Get the TreeLink that separates the subtree from the rest of the tree.
Definition: subtree.hpp:125
genesis::tree::Tree::node_count
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.hpp:264
genesis::tree::Tree::link_count
size_t link_count() const
Return the number of TreeLinks of the Tree.
Definition: tree/tree.hpp:256
genesis::tree::delete_zero_branch_length_edges
void delete_zero_branch_length_edges(Tree &tree, bool include_leaf_edges)
Delete (contract) all branches of a CommonTree that have branch length zero.
Definition: tree/function/manipulation.cpp:706
genesis::tree::LadderizeOrder
LadderizeOrder
Definition: tree/function/manipulation.hpp:413
genesis::tree::Tree::expose_link_container
LinkContainerType & expose_link_container()
Get the container that stores all TreeLinks of the Tree.
Definition: tree/tree.cpp:192
tree.hpp
Header of Tree class.
genesis::tree::TreeEdge::index
size_t index() const
Return the index of this Edge.
Definition: edge.hpp:106
functions.hpp
genesis::tree::Tree::expose_node_container
NodeContainerType & expose_node_container()
Get the container that stores all TreeNodes of the Tree.
Definition: tree/tree.cpp:197
genesis::tree::Tree::node_at
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.hpp:220
genesis::tree::Tree::edge_at
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.hpp:238
genesis::tree::preorder
utils::Range< IteratorPreorder< true > > preorder(ElementType const &element)
Definition: tree/iterator/preorder.hpp:264
genesis::tree::add_new_node
TreeNode & add_new_node(Tree &tree, TreeNode &target_node)
Add a new Node as a leaf to an existing Node.
Definition: tree/function/manipulation.cpp:109
genesis::tree::add_new_leaf_node
TreeNode & add_new_leaf_node(Tree &tree, TreeEdge &target_edge, std::function< void(TreeEdge &target_edge, TreeEdge &new_edge)> adjust_edges)
Add a new Node as a leaf to an existing Edge, by also adding a new Node in the middle of that Edge.
Definition: tree/function/manipulation.cpp:263
genesis::tree::belongs_to
bool belongs_to(Tree const &tree, TreeNode const &node)
Return whether the TreeNode belongs to the Tree, i.e., whether it is owned by the Tree.
Definition: tree/function/operators.cpp:221
genesis::tree::node_links
utils::Range< IteratorNodeLinks< true > > node_links(ElementType const &element)
Definition: node_links.hpp:186
std.hpp
Provides some valuable additions to STD.
genesis::tree::degree
size_t degree(TreeLink const &link)
Return the degree of the node for a given TreeLink, i.e. how many neighbouring nodes it has.
Definition: tree/function/functions.cpp:103
genesis::tree::Tree::root_node
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.hpp:300
genesis::tree::make_rooted
TreeNode & make_rooted(Tree &tree, std::function< void(TreeEdge &target_edge, TreeEdge &new_edge)> adjust_edges)
Root a Tree on the first TreeEdge of its current top level TreeNode.
Definition: tree/function/manipulation.cpp:761
genesis::tree::ladderize
void ladderize(Tree &tree, LadderizeOrder order)
Ladderize a Tree, that is, order its subtrees by size.
Definition: tree/function/manipulation.cpp:840
genesis::tree::TreeEdge::secondary_node
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:162
genesis::tree::TreeNode::primary_link
TreeLink & primary_link()
Return the TreeLink that points towards the root.
Definition: tree/tree/node.hpp:110
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
subtree.hpp
genesis::tree::make_unrooted
void make_unrooted(Tree &tree, std::function< void(TreeEdge &remaining_edge, TreeEdge &deleted_edge)> adjust_edges)
Unroot a Tree.
Definition: tree/function/manipulation.cpp:768
genesis::tree::Tree::reset_root_link
Tree & reset_root_link(TreeLink *root_link)
Reset the link that is considered to be the root of the Tree.
Definition: tree/tree.cpp:184
logging.hpp
Provides easy and fast logging functionality.
genesis::tree::CommonEdgeData::branch_length
double branch_length
Branch length of the edge.
Definition: tree/common_tree/tree.hpp:193
genesis::tree::is_root
bool is_root(TreeLink const &link)
Return whether the link belongs to the root node of its Tree.
Definition: tree/function/functions.cpp:89
genesis::tree::Tree::root_link
TreeLink & root_link()
Return the TreeLink at the current root of the Tree.
Definition: tree/tree.hpp:284
genesis::tree::Subtree
Reference to a subtree of a Tree.
Definition: subtree.hpp:69
genesis::tree::Tree::expose_edge_container
EdgeContainerType & expose_edge_container()
Get the container that stores all TreeEdges of the Tree.
Definition: tree/tree.cpp:202
genesis::utils::minmax_value
std::pair< T, T > minmax_value(T const &a, T const &b)
Returns the lowest and the greatest of the given values, by value.
Definition: algorithm.hpp:128
genesis::tree::TreeEdge
Definition: edge.hpp:60
genesis::tree::TreeNode
Definition: tree/tree/node.hpp:58
genesis::tree::TreeEdge::reset_primary_link
TreeEdge & reset_primary_link(TreeLink *val)
Reset the internal pointer to the primary TreeLink of this TreeEdge.
Definition: edge.hpp:290
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::TreeEdge::reset_index
TreeEdge & reset_index(size_t val)
Reset the internal index of this TreeEdge.
Definition: edge.hpp:276
manipulation.hpp
genesis::tree::TreeNode::link
TreeLink & link()
Return the TreeLink that points towards the root.
Definition: tree/tree/node.hpp:129
genesis::tree::TreeEdge::primary_node
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:146
operators.hpp
Tree operator functions.
genesis::tree::Tree::nodes
utils::Range< IteratorNodes > nodes()
Definition: tree/tree.hpp:418
genesis::tree::Tree::edges
utils::Range< IteratorEdges > edges()
Definition: tree/tree.hpp:452
genesis::tree::TreeEdge::data_ptr
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:246
genesis::tree::delete_edge
void delete_edge(Tree &tree, TreeEdge &target_edge, std::function< void(TreeNode &remaining_node, TreeNode &deleted_node)> adjust_nodes)
Delete a TreeEdge from a Tree, that is, contract the two TreeNodes at its ends into one TreeNode.
Definition: tree/function/manipulation.cpp:597
genesis::tree::minimal_tree_topology
Tree minimal_tree_topology()
Create a minimal Tree that can be used with manipulation functions such as add_new_node() or add_new_...
Definition: tree/function/manipulation.cpp:55
genesis::tree::subtree_sizes
std::vector< size_t > subtree_sizes(Tree const &tree, TreeNode const &node)
Calculate the sizes of all subtrees as seen from the given TreeNode.
Definition: tree/function/functions.cpp:367
genesis::tree::CommonEdgeData
Common class containing the commonly needed data for tree edges.
Definition: tree/common_tree/tree.hpp:144
genesis::tree::is_rooted
bool is_rooted(Tree const &tree)
Return whether the Tree is rooted, that is, whether the root node has two neighbors.
Definition: tree/function/functions.cpp:163
genesis::tree::add_new_node
TreeNode & add_new_node(Tree &tree, TreeEdge &target_edge, std::function< void(TreeEdge &target_edge, TreeEdge &new_edge)> adjust_edges)
Add a new Node that splits an existing Edge.
Definition: tree/function/manipulation.cpp:188
genesis::tree::delete_node
void delete_node(Tree &tree, TreeNode &target_node)
Delete a TreeNode from a Tree.
Definition: tree/function/manipulation.cpp:277
genesis::tree::BaseEdgeData::recreate
virtual std::unique_ptr< BaseEdgeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: edge_data.hpp:126
genesis::tree::Tree::link_at
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.hpp:202
genesis::tree::TreeEdge::reset_secondary_link
TreeEdge & reset_secondary_link(TreeLink *val)
Reset the internal pointer to the secondary TreeLink of this TreeEdge.
Definition: edge.hpp:304
genesis::tree::TreeNode::index
size_t index() const
Return the index of this Node.
Definition: tree/tree/node.hpp:102
genesis::tree::delete_subtree
void delete_subtree(Tree &tree, Subtree const &subtree)
Delete a complete Subtree from a Tree.
Definition: tree/function/manipulation.cpp:518
genesis::tree::is_leaf
bool is_leaf(TreeLink const &link)
Return true iff the node of the given link is a leaf node.
Definition: tree/function/functions.cpp:59
genesis::tree::TreeNode::data_ptr
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: tree/tree/node.hpp:232
genesis::utils::stable_sort_indices
std::vector< size_t > stable_sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the stable sorted order of the given range.
Definition: algorithm.hpp:239
genesis::tree::TreeEdge::secondary_link
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.hpp:130
genesis::tree::BaseNodeData::recreate
virtual std::unique_ptr< BaseNodeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: node_data.hpp:126
genesis::tree::change_rooting
void change_rooting(Tree &tree, TreeNode &at_node)
Reroot the Tree at the given TreeNode.
Definition: tree/function/manipulation.cpp:828
genesis::tree::TreeNode::reset_index
TreeNode & reset_index(size_t val)
Reset the internal index of this TreeNode.
Definition: tree/tree/node.hpp:262
genesis::tree::delete_leaf_node
void delete_leaf_node(Tree &tree, TreeNode &target_node)
Delete a leaf TreeNode.
Definition: tree/function/manipulation.cpp:296
genesis::tree::delete_linear_node
void delete_linear_node(Tree &tree, TreeNode &target_node, std::function< void(TreeEdge &remaining_edge, TreeEdge &deleted_edge)> adjust_edges)
Delete a "linear" TreeNode from a Tree, that is, a node with two neighbours.
Definition: tree/function/manipulation.cpp:385
preorder.hpp
genesis::tree::Tree::edge_count
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.hpp:272