A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
manipulation.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
37 
40 
41 #include <cassert>
42 #include <vector>
43 
44 namespace genesis {
45 namespace tree {
46 
47 // =================================================================================================
48 // Add single Nodes
49 // =================================================================================================
50 
51 TreeEdge& add_new_node( Tree& tree, TreeNode& target_node )
52 {
53  // Basic check.
54  if( ! belongs_to( target_node, tree ) ) {
55  throw std::runtime_error(
56  "Cannot create Node at Tree on an Edge that is not part of the Tree."
57  );
58  }
59 
60  // Get container access shortcuts.
61  auto& links = tree.expose_link_container();
62  auto& nodes = tree.expose_node_container();
63  auto& edges = tree.expose_edge_container();
64 
65  // Create all new elements that we need:
66  // 1. A link that gets added to the given node and connects it to the new node.
67  // 2. The link that belongs to the newly created leaf node.
68  // 3. The newly created node itself.
69  // 4. The edge between the two nodes.
70  auto con_link_u = utils::make_unique< TreeLink >();
71  auto end_link_u = utils::make_unique< TreeLink >();
72  auto end_node_u = utils::make_unique< TreeNode >();
73  auto con_edge_u = utils::make_unique< TreeEdge >();
74 
75  // Get the pointers, for ease of use.
76  auto const con_link = con_link_u.get();
77  auto const end_link = end_link_u.get();
78  auto const end_node = end_node_u.get();
79  auto const con_edge = con_edge_u.get();
80 
81  // Populate the link at the given node.
82  con_link->reset_index( links.size() );
83  con_link->reset_node( &target_node );
84  con_link->reset_edge( con_edge );
85  con_link->reset_outer( end_link );
86 
87  // Find the last link of the given node (in traversal order around the node).
88  auto const up_link = &target_node.primary_link();
89  auto last_link = up_link;
90  while( &last_link->next() != up_link ) {
91  last_link = &last_link->next();
92  }
93 
94  // Now insert the new link in between up_link and last_link.
95  assert( &last_link->next() == up_link );
96  last_link->reset_next( con_link );
97  con_link->reset_next( up_link );
98  assert( &last_link->next() == con_link );
99  assert( &con_link->next() == up_link );
100 
101  // Add the link to the tree. This changes the size of the links vector, so the next call to
102  // links.size() will give a new, proper value for the other link.
103  links.push_back( std::move( con_link_u ));
104 
105  // Populate the link at the new node and add it to the tree.
106  end_link->reset_index( links.size() );
107  end_link->reset_node( end_node );
108  end_link->reset_edge( con_edge );
109  end_link->reset_next( end_link );
110  end_link->reset_outer( con_link );
111  links.push_back( std::move( end_link_u ));
112 
113  // Populate the new node and add it to the tree.
114  end_node->reset_index( nodes.size() );
115  end_node->reset_primary_link( end_link );
116  end_node->reset_data( target_node.data_ptr()->recreate() );
117  nodes.push_back( std::move( end_node_u ));
118 
119  // Populate the new edge and add it to the tree.
120  con_edge->reset_index( edges.size() );
121  con_edge->reset_primary_link( con_link );
122  con_edge->reset_secondary_link( end_link );
123  con_edge->reset_data( target_node.primary_link().edge().data_ptr()->recreate() );
124  edges.push_back( std::move( con_edge_u ));
125 
126  // Return the new edge. We just moved the edge uniq ptr, but not the pointee, so this is valid.
127  return *con_edge;
128 }
129 
130 TreeNode& add_new_node( Tree& tree, TreeEdge& target_edge )
131 {
132  // Basic check.
133  if( ! belongs_to( target_edge, tree ) ) {
134  throw std::runtime_error(
135  "Cannot create Node at Tree on an Edge that is not part of the Tree."
136  );
137  }
138 
139  // Get container access shortcuts.
140  auto& links = tree.expose_link_container();
141  auto& nodes = tree.expose_node_container();
142  auto& edges = tree.expose_edge_container();
143 
144  // This function works in two steps: First, we create a new node with all necessary other
145  // elements in the middle of the target edge. Then, we add the new leaf node to this node
146  // by calling the node version of add_new_node() on the new mid-edge node.
147 
148  // Create all new elements for the first step:
149  // * Two links that build a new node in the middle of the target edge.
150  // * The new node in the middle of the target edge.
151  // * A new edge that connects to the secondary end of the target edge.
152  auto pri_link_u = utils::make_unique< TreeLink >();
153  auto sec_link_u = utils::make_unique< TreeLink >();
154  auto mid_node_u = utils::make_unique< TreeNode >();
155  auto sec_edge_u = utils::make_unique< TreeEdge >();
156 
157  // Get the pointers, for ease of use.
158  auto const pri_link = pri_link_u.get();
159  auto const sec_link = sec_link_u.get();
160  auto const mid_node = mid_node_u.get();
161  auto const sec_edge = sec_edge_u.get();
162 
163  // Populate the link towards the primary end of the target edge and add it to the tree.
164  pri_link->reset_index( links.size() );
165  pri_link->reset_next( sec_link );
166  pri_link->reset_outer( &target_edge.primary_link() );
167  pri_link->reset_node( mid_node );
168  pri_link->reset_edge( &target_edge );
169  links.push_back( std::move( pri_link_u ));
170 
171  // Populate the link towards the secondary end of the target edge and add it to the tree.
172  sec_link->reset_index( links.size() );
173  sec_link->reset_next( pri_link );
174  sec_link->reset_outer( &target_edge.secondary_link() );
175  sec_link->reset_node( mid_node );
176  sec_link->reset_edge( sec_edge );
177  links.push_back( std::move( sec_link_u ));
178 
179  // Populate the new node in the middle of the target edge and add it to the tree.
180  mid_node->reset_index( nodes.size() );
181  mid_node->reset_primary_link( pri_link );
182  mid_node->reset_data( target_edge.primary_node().data_ptr()->recreate() );
183  nodes.push_back( std::move( mid_node_u ));
184 
185  // Populate the edge at the secondary end of the target edge and add it to the tree.
186  sec_edge->reset_index( edges.size() );
187  sec_edge->reset_primary_link( sec_link );
188  sec_edge->reset_secondary_link( &target_edge.secondary_link() );
189  sec_edge->reset_data( target_edge.data_ptr()->recreate() );
190  edges.push_back( std::move( sec_edge_u ));
191 
192  // Finally adjust the existing tree elements.
193  target_edge.primary_link().reset_outer( pri_link );
194  target_edge.secondary_link().reset_outer( sec_link );
195  target_edge.secondary_link().reset_edge( sec_edge );
196  target_edge.reset_secondary_link( pri_link );
197 
198  return *mid_node;
199 }
200 
201 TreeEdge& add_new_leaf_node( Tree& tree, TreeEdge& target_edge )
202 {
203  // First add a node that splits the edge, and then a new leaf node to this one.
204  auto& mid_node = add_new_node( tree, target_edge );
205  return add_new_node( tree, mid_node );
206 }
207 
208 TreeNode& add_root_node( Tree& tree, TreeEdge& target_edge )
209 {
210  auto& mid_node = add_new_node( tree, target_edge );
211  reroot( tree, mid_node );
212  return mid_node;
213 }
214 
215 // =================================================================================================
216 // Rerooting
217 // =================================================================================================
218 
219 void reroot( Tree& tree, TreeLink& at_link )
220 {
221  if( ! belongs_to( at_link, tree ) ) {
222  throw std::runtime_error( "Cannot reroot Tree on a Link that is not part of the Tree." );
223  }
224 
225  // We store the old root node, becasue we will change internals of the tree, so that
226  // node().is_root() won't work while this function is running.
227  TreeNode* old_root = &tree.root_node();
228 
229  // Pointer to the primary link of the target node.
230  TreeLink* cur_link = &tree.link_at( at_link.index() ).node().primary_link();
231 
232  // Set new root index and node link of the new root.
233  tree.reset_root_link_index( at_link.index() );
234  at_link.node().reset_primary_link( &tree.link_at( at_link.index() ));
235 
236  // Walk the path from the new root to the old, and change all pointers of the edges and nodes
237  // on that path so that they point towards the new root.
238  while( &cur_link->node() != old_root ) {
239 
240  // Assert that the primary direction is correct: Is the current link is at the secondary
241  // end of its edge?
242  assert( cur_link == &cur_link->edge().secondary_link() );
243 
244  // Swap the edge's links, so that they point towards the new root.
245  auto link_p_tmp = &cur_link->edge().primary_link();
246  auto link_s_tmp = &cur_link->edge().secondary_link();
247  cur_link->edge().reset_primary_link( link_s_tmp );
248  cur_link->edge().reset_secondary_link( link_p_tmp );
249  // std::swap( cur_link->edge_->link_p_, cur_link->edge_->link_s_ );
250 
251  // Assert that this worked.
252  assert( cur_link == &cur_link->edge().primary_link() );
253  assert( &cur_link->outer() == &cur_link->edge().secondary_link() );
254 
255  // Store the link of the next node that points towards the root.
256  // We need it, because we will change this upwards link of the next node now.
257  auto to_root_link = &cur_link->outer().node().primary_link();
258 
259  // Change the main link of the next node so that it points towards the new root.
260  cur_link->outer().node().reset_primary_link( &cur_link->outer() );
261 
262  // Move one node towards the root.
263  cur_link = to_root_link;
264  }
265 }
266 
267 void reroot( Tree& tree, TreeNode& at_node )
268 {
269  if( ! belongs_to( at_node, tree ) ) {
270  throw std::runtime_error( "Cannot reroot Tree on a Node that is not part of the Tree." );
271  }
272  reroot( tree, at_node.link() );
273 }
274 
275 void reroot_at_node( Tree& tree, size_t node_index )
276 {
277  if( node_index >= tree.node_count() ) {
278  throw std::runtime_error( "Cannot reroot Tree on a Node that is not part of the Tree." );
279  }
280  reroot( tree, tree.node_at( node_index ));
281 }
282 
283 // =================================================================================================
284 // Ladderize
285 // =================================================================================================
286 
287 void ladderize( Tree& tree, LadderizeOrder order )
288 {
289  // For each node, get how many nodes its subtree (away from the root) has.
290  // We use this quantity to sort each node's links.
291  auto sub_sizes = subtree_sizes( tree );
292 
293  // Ladderize all nodes
294  for( auto& node_it : tree.nodes() ) {
295 
296  // No need to ladderize a leaf. It would still work, but we can use this as a speedup.
297  if( is_leaf( *node_it )) {
298  continue;
299  }
300 
301  // Get the sizes of the children/subtrees of this node.
302  std::vector<size_t> child_sizes;
303  std::vector<TreeLink*> child_links;
304  for( auto const& link_it : node_links( *node_it ) ) {
305 
306  // Don't treat the link towards the root; we only want to sort the subtree.
307  // Assert that the first iteration is actually this link towards the root.
308  if( link_it.is_first_iteration() ) {
309  assert( &link_it.link() == &node_it->primary_link() );
310  continue;
311  }
312 
313  child_sizes.push_back( sub_sizes[ link_it.link().outer().node().index() ] );
314  child_links.push_back( &link_it.link() );
315  }
316 
317  // Sort the sizes. We use stable sort in order to not change the order of equal sized subtrees.
318  auto child_order = ( order == LadderizeOrder::kSmallFirst
319  ? utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::less<size_t>() )
320  : utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::greater<size_t>() )
321  );
322 
323  // The number of indices needs to be the rank of the node (number of immediate children).
324  assert( child_order.size() == child_sizes.size() );
325  assert( child_order.size() == child_links.size() );
326  assert( child_order.size() == degree( *node_it ) - 1 );
327 
328  // Change all next links of the node so that they reflect the subtree size order.
329  auto cur_link = &node_it->primary_link();
330  for( auto child_order_i : child_order ) {
331 
332  // We use this assertion to ensure that each link is only processed once.
333  // At the end of this loop, we set it to nullptr, so a second encounter would fail.
334  assert( child_links[ child_order_i ] );
335 
336  // Set the link's next link and move on.
337  cur_link->reset_next( child_links[ child_order_i ] );
338  cur_link = child_links[ child_order_i ];
339 
340  // Set the link in the vector to null, so that the above assert can check that we
341  // never process it again.
342  child_links[ child_order_i ] = nullptr;
343  }
344 
345  // We now need to set the next pointer of the last link of the node so that it points
346  // back to the original starting node (the one towards the root).
347  cur_link->reset_next( &node_it->primary_link() );
348 
349  // Finally, assert that we processed all links. If so, all of them are null by now.
350  for( auto const& cl : child_links ) {
351  (void) cl;
352  assert( !cl );
353  }
354  }
355 }
356 
357 } // namespace tree
358 } // namespace genesis
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.hpp:129
Provides some valuable algorithms that are not part of the C++ 11 STL.
TreeNode & reset_primary_link(TreeLink *val)
Reset the internal pointer to the TreeLink of this TreeNode.
Definition: node.hpp:225
utils::Range< IteratorNodeLinks< TreeLink const, TreeNode const, TreeEdge const > > node_links(ElementType const &element)
Definition: node_links.hpp:175
TreeEdge & add_new_node(Tree &tree, TreeNode &target_node)
Add a new Node as a leaf to an existing Node.
size_t degree(TreeNode const &node)
Return the degree of the node, i.e. how many neighbouring nodes it has.
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
Tree operator functions.
TreeLink & primary_link()
Return the TreeLink that points towards the root.
Definition: node.hpp:108
virtual std::unique_ptr< BaseEdgeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: edge_data.hpp:126
TreeEdge & add_new_leaf_node(Tree &tree, TreeEdge &target_edge)
Add a new Node as a leaf to an existing Edge, by also adding a new Node in the middle of that Edge...
utils::Range< IteratorNodes > nodes()
Definition: tree/tree.cpp:484
bool is_leaf(TreeLink const &link)
Return true iff the node of the given link is a leaf node.
std::vector< size_t > subtree_sizes(Tree const &tree, TreeNode const &node)
Calculate the sizes of all subtrees as seen from the given TreeNode.
Provides some valuable additions to STD.
void reroot(Tree &tree, TreeLink &at_link)
Reroot the Tree at the given TreeLink.
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.cpp:284
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
void reroot_at_node(Tree &tree, size_t node_index)
Reroot the Tree at the TreeNode with the given index.
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:145
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:212
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
bool belongs_to(Tree const &tree, TreeNode const &node)
Return whether the TreeNode belongs to the Tree, i.e., whether it is owned by the Tree...
TreeLink & link()
Return the TreeLink that points towards the root.
Definition: node.hpp:127
LinkContainerType & expose_link_container()
Get the container that stores all TreeLinks of the Tree.
Definition: tree/tree.cpp:393
TreeEdge & reset_primary_link(TreeLink *val)
Reset the internal pointer to the primary TreeLink of this TreeEdge.
Definition: edge.hpp:256
void ladderize(Tree &tree, LadderizeOrder order)
virtual std::unique_ptr< BaseNodeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: node_data.hpp:126
std::vector< size_t > stable_sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the stable sorted order of the given range.
Definition: algorithm.hpp:181
Tree & reset_root_link_index(size_t val)
Reset the index of the link that is considered to be the root of the Tree.
Definition: tree/tree.cpp:377
Header of Tree class.
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.hpp:113
EdgeContainerType & expose_edge_container()
Get the container that stores all TreeEdges of the Tree.
Definition: tree/tree.cpp:417
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.hpp:181
TreeNode & add_root_node(Tree &tree, TreeEdge &target_edge)
Add a new Node that splits an existing Edge, and root the tree on that new Node.
TreeEdge & reset_secondary_link(TreeLink *val)
Reset the internal pointer to the secondary TreeLink of this TreeEdge.
Definition: edge.hpp:270
NodeContainerType & expose_node_container()
Get the container that stores all TreeNodes of the Tree.
Definition: tree/tree.cpp:405