A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
manipulation.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
37 
40 
41 #include <cassert>
42 #include <vector>
43 
44 namespace genesis {
45 namespace tree {
46 
47 // =================================================================================================
48 // Add single Nodes
49 // =================================================================================================
50 
51 TreeEdge& add_new_node( Tree& tree, TreeNode& target_node )
52 {
53  // Basic check.
54  if( ! belongs_to( target_node, tree ) ) {
55  throw std::runtime_error(
56  "Cannot create Node at Tree on an Edge that is not part of the Tree."
57  );
58  }
59 
60  // Get container access shortcuts.
61  auto& links = tree.expose_link_container();
62  auto& nodes = tree.expose_node_container();
63  auto& edges = tree.expose_edge_container();
64 
65  // Create all new elements that we need:
66  // 1. A link that gets added to the given node and connects it to the new node.
67  // 2. The link that belongs to the newly created leaf node.
68  // 3. The newly created node itself.
69  // 4. The edge between the two nodes.
70  auto con_link_u = utils::make_unique< TreeLink >();
71  auto end_link_u = utils::make_unique< TreeLink >();
72  auto end_node_u = utils::make_unique< TreeNode >();
73  auto con_edge_u = utils::make_unique< TreeEdge >();
74 
75  // Get the pointers, for ease of use.
76  auto const con_link = con_link_u.get();
77  auto const end_link = end_link_u.get();
78  auto const end_node = end_node_u.get();
79  auto const con_edge = con_edge_u.get();
80 
81  // Populate the link at the given node.
82  con_link->reset_index( links.size() );
83  con_link->reset_node( &target_node );
84  con_link->reset_edge( con_edge );
85  con_link->reset_outer( end_link );
86 
87  // Find the last link of the given node (in traversal order around the node).
88  auto const up_link = &target_node.primary_link();
89  auto last_link = up_link;
90  while( &last_link->next() != up_link ) {
91  last_link = &last_link->next();
92  }
93 
94  // Now insert the new link in between up_link and last_link.
95  assert( &last_link->next() == up_link );
96  last_link->reset_next( con_link );
97  con_link->reset_next( up_link );
98  assert( &last_link->next() == con_link );
99  assert( &con_link->next() == up_link );
100 
101  // Add the link to the tree. This changes the size of the links vector, so the next call to
102  // links.size() will give a new, proper value for the other link.
103  links.push_back( std::move( con_link_u ));
104 
105  // Populate the link at the new node and add it to the tree.
106  end_link->reset_index( links.size() );
107  end_link->reset_node( end_node );
108  end_link->reset_edge( con_edge );
109  end_link->reset_next( end_link );
110  end_link->reset_outer( con_link );
111  links.push_back( std::move( end_link_u ));
112 
113  // Populate the new node and add it to the tree.
114  end_node->reset_index( nodes.size() );
115  end_node->reset_primary_link( end_link );
116  end_node->reset_data( target_node.data_ptr()->recreate() );
117  nodes.push_back( std::move( end_node_u ));
118 
119  // Populate the new edge and add it to the tree.
120  con_edge->reset_index( edges.size() );
121  con_edge->reset_primary_link( con_link );
122  con_edge->reset_secondary_link( end_link );
123  con_edge->reset_data( target_node.primary_link().edge().data_ptr()->recreate() );
124  edges.push_back( std::move( con_edge_u ));
125 
126  // Return the new edge. We just moved the edge uniq ptr, but not the pointee, so this is valid.
127  return *con_edge;
128 }
129 
130 TreeEdge& add_new_node( Tree& tree, TreeEdge& target_edge )
131 {
132  // Basic check.
133  if( ! belongs_to( target_edge, tree ) ) {
134  throw std::runtime_error(
135  "Cannot create Node at Tree on an Edge that is not part of the Tree."
136  );
137  }
138 
139  // Get container access shortcuts.
140  auto& links = tree.expose_link_container();
141  auto& nodes = tree.expose_node_container();
142  auto& edges = tree.expose_edge_container();
143 
144  // This function works in two steps: First, we create a new node with all necessary other
145  // elements in the middle of the target edge. Then, we add the new leaf node to this node
146  // by calling the node version of add_new_node() on the new mid-edge node.
147 
148  // Create all new elements for the first step:
149  // * Two links that build a new node in the middle of the target edge.
150  // * The new node in the middle of the target edge.
151  // * A new edge that connects to the secondary end of the target edge.
152  auto pri_link_u = utils::make_unique< TreeLink >();
153  auto sec_link_u = utils::make_unique< TreeLink >();
154  auto mid_node_u = utils::make_unique< TreeNode >();
155  auto sec_edge_u = utils::make_unique< TreeEdge >();
156 
157  // Get the pointers, for ease of use.
158  auto const pri_link = pri_link_u.get();
159  auto const sec_link = sec_link_u.get();
160  auto const mid_node = mid_node_u.get();
161  auto const sec_edge = sec_edge_u.get();
162 
163  // Populate the link towards the primary end of the target edge and add it to the tree.
164  pri_link->reset_index( links.size() );
165  pri_link->reset_next( sec_link );
166  pri_link->reset_outer( &target_edge.primary_link() );
167  pri_link->reset_node( mid_node );
168  pri_link->reset_edge( &target_edge );
169  links.push_back( std::move( pri_link_u ));
170 
171  // Populate the link towards the secondary end of the target edge and add it to the tree.
172  sec_link->reset_index( links.size() );
173  sec_link->reset_next( pri_link );
174  sec_link->reset_outer( &target_edge.secondary_link() );
175  sec_link->reset_node( mid_node );
176  sec_link->reset_edge( sec_edge );
177  links.push_back( std::move( sec_link_u ));
178 
179  // Populate the new node in the middle of the target edge and add it to the tree.
180  mid_node->reset_index( nodes.size() );
181  mid_node->reset_primary_link( pri_link );
182  mid_node->reset_data( target_edge.primary_node().data_ptr()->recreate() );
183  nodes.push_back( std::move( mid_node_u ));
184 
185  // Populate the edge at the secondary end of the target edge and add it to the tree.
186  sec_edge->reset_index( edges.size() );
187  sec_edge->reset_primary_link( sec_link );
188  sec_edge->reset_secondary_link( &target_edge.secondary_link() );
189  sec_edge->reset_data( target_edge.data_ptr()->recreate() );
190  edges.push_back( std::move( sec_edge_u ));
191 
192  // Finally adjust the existing tree elements.
193  target_edge.primary_link().reset_outer( pri_link );
194  target_edge.secondary_link().reset_outer( sec_link );
195  target_edge.secondary_link().reset_edge( sec_edge );
196  target_edge.reset_secondary_link( pri_link );
197 
198  return add_new_node( tree, *mid_node );
199 }
200 
201 // =================================================================================================
202 // Rerooting
203 // =================================================================================================
204 
205 void reroot( Tree& tree, TreeLink& at_link )
206 {
207  if( ! belongs_to( at_link, tree ) ) {
208  throw std::runtime_error( "Cannot reroot Tree on a Link that is not part of the Tree." );
209  }
210 
211  // We store the old root node, becasue we will change internals of the tree, so that
212  // node().is_root() won't work while this function is running.
213  TreeNode* old_root = &tree.root_node();
214 
215  // Pointer to the primary link of the target node.
216  TreeLink* cur_link = &tree.link_at( at_link.index() ).node().primary_link();
217 
218  // Set new root index and node link of the new root.
219  tree.reset_root_link_index( at_link.index() );
220  at_link.node().reset_primary_link( &tree.link_at( at_link.index() ));
221 
222  // Walk the path from the new root to the old, and change all pointers of the edges and nodes
223  // on that path so that they point towards the new root.
224  while( &cur_link->node() != old_root ) {
225 
226  // Assert that the primary direction is correct: Is the current link is at the secondary
227  // end of its edge?
228  assert( cur_link == &cur_link->edge().secondary_link() );
229 
230  // Swap the edge's links, so that they point towards the new root.
231  auto link_p_tmp = &cur_link->edge().primary_link();
232  auto link_s_tmp = &cur_link->edge().secondary_link();
233  cur_link->edge().reset_primary_link( link_s_tmp );
234  cur_link->edge().reset_secondary_link( link_p_tmp );
235  // std::swap( cur_link->edge_->link_p_, cur_link->edge_->link_s_ );
236 
237  // Assert that this worked.
238  assert( cur_link == &cur_link->edge().primary_link() );
239  assert( &cur_link->outer() == &cur_link->edge().secondary_link() );
240 
241  // Store the link of the next node that points towards the root.
242  // We need it, because we will change this upwards link of the next node now.
243  auto to_root_link = &cur_link->outer().node().primary_link();
244 
245  // Change the main link of the next node so that it points towards the new root.
246  cur_link->outer().node().reset_primary_link( &cur_link->outer() );
247 
248  // Move one node towards the root.
249  cur_link = to_root_link;
250  }
251 }
252 
253 void reroot( Tree& tree, TreeNode& at_node )
254 {
255  if( ! belongs_to( at_node, tree ) ) {
256  throw std::runtime_error( "Cannot reroot Tree on a Node that is not part of the Tree." );
257  }
258  reroot( tree, at_node.link() );
259 }
260 
261 void reroot_at_node( Tree& tree, size_t node_index )
262 {
263  if( node_index >= tree.node_count() ) {
264  throw std::runtime_error( "Cannot reroot Tree on a Node that is not part of the Tree." );
265  }
266  reroot( tree, tree.node_at( node_index ));
267 }
268 
269 // =================================================================================================
270 // Ladderize
271 // =================================================================================================
272 
273 void ladderize( Tree& tree, LadderizeOrder order )
274 {
275  // For each node, get how many nodes its subtree (away from the root) has.
276  // We use this quantity to sort each node's links.
277  auto sub_sizes = subtree_sizes( tree );
278 
279  // Ladderize all nodes
280  for( auto& node_it : tree.nodes() ) {
281 
282  // No need to ladderize a leaf. It would still work, but we can use this as a speedup.
283  if( node_it->is_leaf() ) {
284  continue;
285  }
286 
287  // Get the sizes of the children/subtrees of this node.
288  std::vector<size_t> child_sizes;
289  std::vector<TreeLink*> child_links;
290  for( auto const& link_it : node_links( *node_it ) ) {
291 
292  // Don't treat the link towards the root; we only want to sort the subtree.
293  // Assert that the first iteration is actually this link towards the root.
294  if( link_it.is_first_iteration() ) {
295  assert( &link_it.link() == &node_it->primary_link() );
296  continue;
297  }
298 
299  child_sizes.push_back( sub_sizes[ link_it.link().outer().node().index() ] );
300  child_links.push_back( &link_it.link() );
301  }
302 
303  // Sort the sizes. We use stable sort in order to not change the order of equal sized subtrees.
304  auto child_order = ( order == LadderizeOrder::kSmallFirst
305  ? utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::less<size_t>() )
306  : utils::stable_sort_indices( child_sizes.begin(), child_sizes.end(), std::greater<size_t>() )
307  );
308 
309  // The number of indices needs to be the rank of the node (number of immediate children).
310  assert( child_order.size() == child_sizes.size() );
311  assert( child_order.size() == child_links.size() );
312  assert( child_order.size() == node_it->rank() );
313 
314  // Change all next links of the node so that they reflect the subtree size order.
315  auto cur_link = &node_it->primary_link();
316  for( auto child_order_i : child_order ) {
317 
318  // We use this assertion to ensure that each link is only processed once.
319  // At the end of this loop, we set it to nullptr, so a second encounter would fail.
320  assert( child_links[ child_order_i ] );
321 
322  // Set the link's next link and move on.
323  cur_link->reset_next( child_links[ child_order_i ] );
324  cur_link = child_links[ child_order_i ];
325 
326  // Set the link in the vector to null, so that the above assert can check that we
327  // never process it again.
328  child_links[ child_order_i ] = nullptr;
329  }
330 
331  // We now need to set the next pointer of the last link of the node so that it points
332  // back to the original starting node (the one towards the root).
333  cur_link->reset_next( &node_it->primary_link() );
334 
335  // Finally, assert that we processed all links. If so, all of them are null by now.
336  for( auto const& cl : child_links ) {
337  (void) cl;
338  assert( !cl );
339  }
340  }
341 }
342 
343 } // namespace tree
344 } // namespace genesis
Provides some valuable algorithms that are not part of the C++ 11 STL.
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.cpp:69
TreeLink & link()
Return the TreeLink that points towards the root.
Definition: node.cpp:75
utils::Range< IteratorNodeLinks< TreeLink const, TreeNode const, TreeEdge const > > node_links(ElementType const &element)
Definition: node_links.hpp:175
TreeEdge & add_new_node(Tree &tree, TreeNode &target_node)
Add a new Node as a leaf to an existing Node.
TreeNode & reset_primary_link(TreeLink *val)
Reset the internal pointer to the TreeLink of this TreeNode.
Definition: node.cpp:149
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
Tree operator functions.
virtual std::unique_ptr< BaseEdgeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: edge_data.hpp:126
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.cpp:128
utils::Range< IteratorNodes > nodes()
Definition: tree/tree.cpp:484
std::vector< size_t > subtree_sizes(Tree const &tree, TreeNode const &node)
Calculate the sizes of all subtrees as seen from the given TreeNode.
Provides some valuable additions to STD.
void reroot(Tree &tree, TreeLink &at_link)
Reroot the Tree at the given TreeLink.
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.cpp:284
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
void reroot_at_node(Tree &tree, size_t node_index)
Reroot the Tree at the TreeNode with the given index.
TreeLink & primary_link()
Return the TreeLink that points towards the root.
Definition: node.cpp:56
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.cpp:53
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
bool belongs_to(Tree const &tree, TreeNode const &node)
Return whether the TreeNode belongs to the Tree, i.e., whether it is owned by the Tree...
TreeEdge & reset_secondary_link(TreeLink *val)
Reset the internal pointer to the secondary TreeLink of this TreeEdge.
Definition: edge.cpp:186
LinkContainerType & expose_link_container()
Get the container that stores all TreeLinks of the Tree.
Definition: tree/tree.cpp:393
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.cpp:105
void ladderize(Tree &tree, LadderizeOrder order)
virtual std::unique_ptr< BaseNodeData > recreate() const
Polymorphically create a default-constructed instance of this class with the same derived type as it ...
Definition: node_data.hpp:126
std::vector< size_t > stable_sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the stable sorted order of the given range.
Definition: algorithm.hpp:149
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.cpp:85
Tree & reset_root_link_index(size_t val)
Reset the index of the link that is considered to be the root of the Tree.
Definition: tree/tree.cpp:377
Header of Tree class.
EdgeContainerType & expose_edge_container()
Get the container that stores all TreeEdges of the Tree.
Definition: tree/tree.cpp:417
NodeContainerType & expose_node_container()
Get the container that stores all TreeNodes of the Tree.
Definition: tree/tree.cpp:405
TreeEdge & reset_primary_link(TreeLink *val)
Reset the internal pointer to the primary TreeLink of this TreeEdge.
Definition: edge.cpp:172