A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
36 
37 #include <ostream>
38 
39 namespace genesis {
40 namespace tree {
41 
42 // =================================================================================================
43 // Conversion
44 // =================================================================================================
45 
59  Tree const& source,
60  std::function< std::unique_ptr<BaseNodeData>( BaseNodeData const& node_data )> node_data_converter,
61  std::function< std::unique_ptr<BaseEdgeData>( BaseEdgeData const& edge_data )> edge_data_converter
62 ) {
63  // Copy the topology. All data pointers of the new tree are nullptrs.
64  auto res = source.clone_topology();
65 
66  // Convert data where there is data.
67  for( size_t i = 0; i < res.node_count(); ++i ) {
68  if( source.node_at(i).has_data() ) {
69  res.node_at(i).reset_data( node_data_converter( *source.node_at(i).data_ptr() ));
70  }
71  }
72  for( size_t i = 0; i < res.edge_count(); ++i ) {
73  if( source.edge_at(i).has_data() ) {
74  res.edge_at(i).reset_data( edge_data_converter( *source.edge_at(i).data_ptr() ));
75  }
76  }
77 
78  return res;
79 }
80 
81 // =================================================================================================
82 // Equality
83 // =================================================================================================
84 
101 bool equal(
102  Tree const& lhs,
103  Tree const& rhs,
104  std::function<bool ( TreeNode const&, TreeNode const&) > node_comparator,
105  std::function<bool ( TreeEdge const&, TreeEdge const&) > edge_comparator
106 ) {
107  // Check array sizes.
108  if (lhs.link_count() != rhs.link_count() ||
109  lhs.node_count() != rhs.node_count() ||
110  lhs.edge_count() != rhs.edge_count()
111  ) {
112  return false;
113  }
114 
115  // do a preorder traversal on both trees in parallel
116  auto it_l = preorder(lhs).begin();
117  auto it_r = preorder(rhs).begin();
118  for (
119  ;
120  it_l != preorder(lhs).end() && it_r != preorder(rhs).end();
121  ++it_l, ++it_r
122  ) {
123  if (it_l.node().rank() != it_r.node().rank() ||
124  !node_comparator( it_l.node(), it_r.node() ) ||
125  !edge_comparator( it_l.edge(), it_r.edge() )
126  ) {
127  return false;
128  }
129  }
130 
131  // check whether we are done with both trees
132  if( it_l != preorder(lhs).end() || it_r != preorder(rhs).end() ) {
133  return false;
134  }
135 
136  return true;
137 }
138 
139 /* *
140  * @brief Compares two trees for equality using the respective comparision operators for their nodes
141  * and edges.
142  *
143  * This method is mainly a shortcut for the other equal function, where the comparator functionals
144  * are instanciated using the default comparision operators of the tree's data.
145  */
146 // bool equal( Tree const& lhs, Tree const& rhs)
147 // {
148 // auto node_comparator = [] (
149 // TreeNode const& node_l,
150 // TreeNode const& node_r
151 // ) {
152 // return node_l == node_r;
153 // };
154 //
155 // auto edge_comparator = [] (
156 // TreeEdge const& edge_l,
157 // TreeEdge const& edge_r
158 // ) {
159 // return edge_l == edge_r;
160 // };
161 //
162 // return equal<TreeTypeL, TreeTypeR>(lhs, rhs, node_comparator, edge_comparator);
163 // }
164 
173 bool identical_topology( Tree const& lhs, Tree const& rhs)
174 {
175  auto node_comparator = [] (
176  TreeNode const& node_l,
177  TreeNode const& node_r
178  ) {
179  (void) node_l;
180  (void) node_r;
181  return true;
182  };
183 
184  auto edge_comparator = [] (
185  TreeEdge const& edge_l,
186  TreeEdge const& edge_r
187  ) {
188  (void) edge_l;
189  (void) edge_r;
190  return true;
191  };
192 
193  return equal( lhs, rhs, node_comparator, edge_comparator );
194 }
195 
199 bool belongs_to( Tree const& tree, TreeNode const& node )
200 {
201  return node.index() < tree.node_count() && &tree.node_at( node.index() ) == &node;
202 }
203 
207 bool belongs_to( TreeNode const& node, Tree const& tree )
208 {
209  return node.index() < tree.node_count() && &tree.node_at( node.index() ) == &node;
210 }
211 
215 bool belongs_to( Tree const& tree, TreeEdge const& edge )
216 {
217  return edge.index() < tree.edge_count() && &tree.edge_at( edge.index() ) == &edge;
218 }
219 
223 bool belongs_to( TreeEdge const& edge, Tree const& tree )
224 {
225  return edge.index() < tree.edge_count() && &tree.edge_at( edge.index() ) == &edge;
226 }
227 
231 bool belongs_to( Tree const& tree, TreeLink const& link )
232 {
233  return link.index() < tree.link_count() && &tree.link_at( link.index() ) == &link;
234 }
235 
239 bool belongs_to( TreeLink const& link, Tree const& tree )
240 {
241  return link.index() < tree.link_count() && &tree.link_at( link.index() ) == &link;
242 }
243 
248 {
249  // No need to check whether the two nodes belong to the same tree.
250  // If they don't, this loop will simply exit without success, so that a nullptr is returend.
251  for( auto it : node_links( lhs ) ) {
252  if( &it.link().outer().node() == &rhs ) {
253  return &it.link().edge();
254  }
255  }
256  return nullptr;
257 }
258 
262 TreeEdge const* edge_between( TreeNode const& lhs, TreeNode const& rhs )
263 {
264  // No need to check whether the two nodes belong to the same tree.
265  // If they don't, this loop will simply exit without success, so that a nullptr is returend.
266  for( auto it : node_links( lhs ) ) {
267  if( &it.link().outer().node() == &rhs ) {
268  return &it.link().edge();
269  }
270  }
271  return nullptr;
272 }
273 
274 // =================================================================================================
275 // Output
276 // =================================================================================================
277 
278 std::ostream& operator << ( std::ostream& out, Tree const& tree )
279 {
280  out << "Node Count: " << tree.node_count() << "\n";
281  out << "Edge Count: " << tree.edge_count() << "\n";
282  out << "Link Count: " << tree.link_count() << "\n";
283  return out;
284 }
285 
286 // =================================================================================================
287 // Validate
288 // =================================================================================================
289 
296 bool validate_topology( Tree const& tree )
297 {
298  // -----------------------------------------------------
299  // Empty Tree
300  // -----------------------------------------------------
301 
302  // check that the member arrays are valid: if at least one of them is empty, the tree is not
303  // fully initialized, so either it is a new tree without any data (all arrays empty, valid),
304  // or some are empty, but others not (not valid).
305  if (tree.links_.empty() || tree.nodes_.empty() || tree.edges_.empty()) {
306  bool empty = tree.links_.empty() && tree.nodes_.empty() && tree.edges_.empty();
307  if( !empty ) {
308  LOG_INFO << "Tree is not empty, but one of its data members is.";
309  }
310  return empty;
311  }
312 
313  // -----------------------------------------------------
314  // Links
315  // -----------------------------------------------------
316 
317  // Check Links.
318  std::vector<size_t> links_to_edges(tree.edges_.size(), 0);
319  std::vector<size_t> links_to_nodes(tree.nodes_.size(), 0);
320  for (size_t i = 0; i < tree.links_.size(); ++i) {
321  // Check indices.
322  if( i != tree.link_at(i).index() ) {
323  LOG_INFO << "Link at index " << i << " has wrong index ("
324  << tree.link_at(i).index() << ").";
325  return false;
326  }
327 
328  // Check next cycle and node.
329  auto nl = tree.links_[i].get();
330  do {
331  if( &nl->node() != &tree.links_[i]->node() ) {
332  LOG_INFO << "Link at index " << nl->index() << " points to wrong node.";
333  return false;
334  }
335  nl = &nl->next();
336  } while(nl != tree.links_[i].get());
337  ++links_to_nodes[tree.links_[i]->node().index()];
338 
339  // Check outer cycle.
340  if( &tree.links_[i]->outer().outer() != tree.links_[i].get() ) {
341  LOG_INFO << "Link at index " << i << " has wrong outer link.";
342  return false;
343  }
344 
345  // Check edge.
346  auto edge = &tree.links_[i]->edge();
347  if(
348  &edge->primary_link() != tree.links_[i].get() &&
349  &edge->secondary_link() != tree.links_[i].get()
350  ) {
351  LOG_INFO << "Link at index " << i << " has wrong edge pointer.";
352  return false;
353  }
354  ++links_to_edges[tree.links_[i]->edge().index()];
355  }
356 
357  // Check if all edges have been hit twice.
358  for (size_t i = 0; i < tree.edges_.size(); ++i) {
359  if (links_to_edges[i] != 2) {
360  LOG_INFO << "Edge at index " << i << " is not visited twice but " << links_to_edges[i]
361  << " times when traversing the links.";
362  return false;
363  }
364  }
365 
366  // Check if all nodes have been hit as many times as their rank is.
367  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
368  if (links_to_nodes[i] != tree.nodes_[i]->rank() + 1) {
369  LOG_INFO << "Node at index " << i << " is not visited its rank + 1 ("
370  << tree.nodes_[i]->rank() << " + 1 = " << tree.nodes_[i]->rank() + 1
371  << ") times, but " << links_to_nodes[i] << " times when "
372  << "traversing the links.";
373  return false;
374  }
375  }
376 
377  // -----------------------------------------------------
378  // Nodes
379  // -----------------------------------------------------
380 
381  // Check Nodes.
382  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
383  // Check indices.
384  if( i != tree.node_at(i).index() ) {
385  LOG_INFO << "Node at index " << i << " has wrong index ("
386  << tree.node_at(i).index() << ").";
387  return false;
388  }
389 
390  // Check link.
391  if( &tree.nodes_[i]->link().node() != tree.nodes_[i].get() ) {
392  LOG_INFO << "Node at index " << i << " has wrong link.";
393  return false;
394  }
395 
396  // If a node claims to be the root, it better be the root.
397  if( tree.node_at(i).is_root() && i != tree.root_node().index() ) {
398  LOG_INFO << "Node at index " << i << " has is_root(), but it is not tree.root_node().";
399  return false;
400  }
401 
402  // Except for the root, all nodes must have a primary link that is the secondary link
403  // of its edge.
404  if( ! tree.node_at(i).is_root() &&
405  &tree.node_at(i).primary_link() != &tree.node_at(i).primary_link().edge().secondary_link()
406  ) {
407  LOG_INFO << "Node at " << i << " (not the root node) has a primary link which is not "
408  << "the secondary link of its edge.";
409  return false;
410  }
411  }
412 
413  // Further check the root.
414  if( ! tree.root_node().is_root() ) {
415  LOG_INFO << "Root node does not have is_root().";
416  return false;
417  }
418 
419  // -----------------------------------------------------
420  // Edges
421  // -----------------------------------------------------
422 
423  // Check Edges.
424  for (size_t i = 0; i < tree.edges_.size(); ++i) {
425  // Check indices.
426  if( i != tree.edge_at(i).index() ) {
427  LOG_INFO << "Edge at index " << i << " has wrong index ("
428  << tree.edge_at(i).index() << ").";
429  return false;
430  }
431 
432  // Check links.
433  if( &tree.edges_[i]->primary_link().edge() != tree.edges_[i].get() ) {
434  LOG_INFO << "Edge at index " << i << " has wrong primary link.";
435  return false;
436  }
437  if( &tree.edges_[i]->secondary_link().edge() != tree.edges_[i].get() ) {
438  LOG_INFO << "Edge at index " << i << " has wrong secondary link.";
439  return false;
440  }
441 
442  // Check outer links.
443  if( &tree.edges_[i]->primary_link().outer() != &tree.edges_[i]->secondary_link() ) {
444  LOG_INFO << "Edge at index " << i << " has a primary link that does not "
445  << "connect to its secondary link.";
446  return false;
447  }
448  if( &tree.edges_[i]->secondary_link().outer() != &tree.edges_[i]->primary_link() ) {
449  LOG_INFO << "Edge at index " << i << " has a secondary link that does not "
450  << "connect to its primary link.";
451  return false;
452  }
453 
454  // Check primary node, except for root.
455  if( ! tree.edge_at(i).primary_node().is_root() &&
456  &tree.edge_at(i).primary_node().primary_link() == &tree.edge_at(i).primary_link()
457  ) {
458  LOG_INFO << "Edge at " << i << " has a primary node that does not "
459  << "point towards the root.";
460  return false;
461  }
462 
463  // Check secondary node.
464  if( &tree.edge_at(i).secondary_node().primary_link() != &tree.edge_at(i).secondary_link() ) {
465  LOG_INFO << "Edge at " << i << " has a secondary node that does not "
466  << "point towards the root.";
467  return false;
468  }
469  }
470 
471  // -----------------------------------------------------
472  // Eulertour
473  // -----------------------------------------------------
474 
475  // If we are here, all three arrays (links, nodes, edges) contain data, so we can start a full
476  // traversal along all links.
477 
478  // Count, how many times the elements are hit while traversing.
479  std::vector<size_t> it_links(tree.links_.size(), 0);
480  std::vector<size_t> it_edges(tree.edges_.size(), 0);
481  std::vector<size_t> it_nodes(tree.nodes_.size(), 0);
482 
483  // Do the traversal. We do not use the iterator here, to keep it simple when testing.
484  // (We want to validate the tree here, not the iterator.)
485  auto link = tree.links_.front().get();
486  do {
487  ++it_links[ link->index() ];
488  ++it_edges[ link->edge().index() ];
489  ++it_nodes[ link->node().index() ];
490  link = &link->next().outer();
491 
492  // Check if we have a loop. Baaad.
493  if( it_links[ link->index() ] > 1 ) {
494  LOG_INFO << "Loop or other kind of wrong link chain in Tree.";
495  return false;
496  }
497  } while (link != tree.links_.front().get());
498 
499  // Check if all links have been hit once.
500  for (size_t i = 0; i < tree.links_.size(); i++) {
501  if (it_links[i] != 1) {
502  LOG_INFO << "Link at index " << i << " is not visited 1 but " << it_links[i]
503  << " times when iterating the tree.";
504  return false;
505  }
506  }
507 
508  // Check if all edges have been hit twice.
509  for (size_t i = 0; i < tree.edges_.size(); ++i) {
510  if (it_edges[i] != 2) {
511  LOG_INFO << "Edge at index " << i << " is not visited 2 but " << it_edges[i]
512  << " times when iterating the tree.";
513  return false;
514  }
515  }
516 
517  // Check if all nodes have been hit as many times as their rank is.
518  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
519  if (it_nodes[i] != tree.nodes_[i]->rank() + 1) {
520  LOG_INFO << "Node at index " << i << " is not visited "
521  << tree.nodes_[i]->rank() << " + 1 = " << (tree.nodes_[i]->rank() + 1)
522  << " times, but " << it_nodes[i] << " times when iterating the "
523  << "tree.";
524  return false;
525  }
526  }
527 
528  return true;
529 }
530 
531 } // namespace tree
532 } // namespace genesis
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
size_t index() const
Return the index of this Edge.
Definition: edge.cpp:45
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.cpp:69
utils::Range< IteratorNodeLinks< TreeLink const, TreeNode const, TreeEdge const > > node_links(ElementType const &element)
Definition: node_links.hpp:175
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
Tree operator functions.
bool is_root() const
True iff the node is the root of its Tree.
Definition: node.cpp:209
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.cpp:128
TreeEdge * edge_between(TreeNode &lhs, TreeNode &rhs)
Return the TreeEdge between two TreeNode&s, if they are neighbours, or nullptr otherwise.
size_t index() const
Return the index of this Node.
Definition: node.cpp:48
utils::Range< IteratorPreorder< TreeLink const, TreeNode const, TreeEdge const > > preorder(ElementType const &element)
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.cpp:284
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
TreeLink & primary_link()
Return the TreeLink that points towards the root.
Definition: node.cpp:56
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.cpp:53
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
bool belongs_to(Tree const &tree, TreeNode const &node)
Return whether the TreeNode belongs to the Tree, i.e., whether it is owned by the Tree...
Tree clone_topology() const
Return a Tree with the same topology, but without any data.
Definition: tree/tree.cpp:137
bool has_data() const
Return true if the TreeEdge has a data object assigned to it.
Definition: edge.cpp:117
bool validate_topology(Tree const &tree)
Validate that all internal pointers of the Tree elements (TreeLinks, TreeNodes, TreeEdges) to each ot...
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.cpp:101
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.cpp:105
Provides easy and fast logging functionality.
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.cpp:358
TreeNode & reset_data(std::unique_ptr< BaseNodeData > data)
Reset the data pointer of this TreeNode.
Definition: node.cpp:163
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.cpp:85
bool has_data() const
Return true if the TreeNode has a data object assigned to it.
Definition: node.cpp:94
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
bool identical_topology(Tree const &lhs, Tree const &rhs)
Returns true iff both trees have an identical topology.
std::ostream & operator<<(std::ostream &out, Tree const &tree)
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
size_t link_count() const
Return the number of TreeLinks of the Tree.
Definition: tree/tree.cpp:342
#define LOG_INFO
Log an info message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:98