A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/function/operators.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
39 
40 #include <ostream>
41 
42 namespace genesis {
43 namespace tree {
44 
45 // =================================================================================================
46 // Conversion
47 // =================================================================================================
48 
62  Tree const& source,
63  std::function< std::unique_ptr<BaseNodeData>( BaseNodeData const& node_data )> node_data_converter,
64  std::function< std::unique_ptr<BaseEdgeData>( BaseEdgeData const& edge_data )> edge_data_converter
65 ) {
66  // Copy the topology. All data pointers of the new tree are nullptrs.
67  auto res = source.clone_topology();
68 
69  // Convert data where there is data.
70  for( size_t i = 0; i < res.node_count(); ++i ) {
71  if( source.node_at(i).has_data() ) {
72  res.node_at(i).reset_data( node_data_converter( *source.node_at(i).data_ptr() ));
73  }
74  }
75  for( size_t i = 0; i < res.edge_count(); ++i ) {
76  if( source.edge_at(i).has_data() ) {
77  res.edge_at(i).reset_data( edge_data_converter( *source.edge_at(i).data_ptr() ));
78  }
79  }
80 
81  return res;
82 }
83 
84 // =================================================================================================
85 // Equality
86 // =================================================================================================
87 
104 bool equal(
105  Tree const& lhs,
106  Tree const& rhs,
107  std::function<bool ( TreeNode const&, TreeNode const&) > node_comparator,
108  std::function<bool ( TreeEdge const&, TreeEdge const&) > edge_comparator
109 ) {
110  // Check array sizes.
111  if (lhs.link_count() != rhs.link_count() ||
112  lhs.node_count() != rhs.node_count() ||
113  lhs.edge_count() != rhs.edge_count()
114  ) {
115  return false;
116  }
117 
118  // do a preorder traversal on both trees in parallel
119  auto it_l = preorder(lhs).begin();
120  auto it_r = preorder(rhs).begin();
121  for (
122  ;
123  it_l != preorder(lhs).end() && it_r != preorder(rhs).end();
124  ++it_l, ++it_r
125  ) {
126  if( degree( it_l.node() ) != degree( it_r.node() ) ||
127  !node_comparator( it_l.node(), it_r.node() ) ||
128  !edge_comparator( it_l.edge(), it_r.edge() )
129  ) {
130  return false;
131  }
132  }
133 
134  // check whether we are done with both trees
135  if( it_l != preorder(lhs).end() || it_r != preorder(rhs).end() ) {
136  return false;
137  }
138 
139  return true;
140 }
141 
142 /* *
143  * @brief Compares two trees for equality using the respective comparision operators for their nodes
144  * and edges.
145  *
146  * This method is mainly a shortcut for the other equal function, where the comparator functionals
147  * are instanciated using the default comparision operators of the tree's data.
148  */
149 // bool equal( Tree const& lhs, Tree const& rhs)
150 // {
151 // auto node_comparator = [] (
152 // TreeNode const& node_l,
153 // TreeNode const& node_r
154 // ) {
155 // return node_l == node_r;
156 // };
157 //
158 // auto edge_comparator = [] (
159 // TreeEdge const& edge_l,
160 // TreeEdge const& edge_r
161 // ) {
162 // return edge_l == edge_r;
163 // };
164 //
165 // return equal<TreeTypeL, TreeTypeR>(lhs, rhs, node_comparator, edge_comparator);
166 // }
167 
176 bool identical_topology( Tree const& lhs, Tree const& rhs)
177 {
178  auto node_comparator = [] (
179  TreeNode const& node_l,
180  TreeNode const& node_r
181  ) {
182  (void) node_l;
183  (void) node_r;
184  return true;
185  };
186 
187  auto edge_comparator = [] (
188  TreeEdge const& edge_l,
189  TreeEdge const& edge_r
190  ) {
191  (void) edge_l;
192  (void) edge_r;
193  return true;
194  };
195 
196  return equal( lhs, rhs, node_comparator, edge_comparator );
197 }
198 
202 bool belongs_to( Tree const& tree, TreeNode const& node )
203 {
204  return node.index() < tree.node_count() && &tree.node_at( node.index() ) == &node;
205 }
206 
210 bool belongs_to( TreeNode const& node, Tree const& tree )
211 {
212  return node.index() < tree.node_count() && &tree.node_at( node.index() ) == &node;
213 }
214 
218 bool belongs_to( Tree const& tree, TreeEdge const& edge )
219 {
220  return edge.index() < tree.edge_count() && &tree.edge_at( edge.index() ) == &edge;
221 }
222 
226 bool belongs_to( TreeEdge const& edge, Tree const& tree )
227 {
228  return edge.index() < tree.edge_count() && &tree.edge_at( edge.index() ) == &edge;
229 }
230 
234 bool belongs_to( Tree const& tree, TreeLink const& link )
235 {
236  return link.index() < tree.link_count() && &tree.link_at( link.index() ) == &link;
237 }
238 
242 bool belongs_to( TreeLink const& link, Tree const& tree )
243 {
244  return link.index() < tree.link_count() && &tree.link_at( link.index() ) == &link;
245 }
246 
251 {
252  // No need to check whether the two nodes belong to the same tree.
253  // If they don't, this loop will simply exit without success, so that a nullptr is returend.
254  for( auto it : node_links( lhs ) ) {
255  if( &it.link().outer().node() == &rhs ) {
256  return &it.link().edge();
257  }
258  }
259  return nullptr;
260 }
261 
265 TreeEdge const* edge_between( TreeNode const& lhs, TreeNode const& rhs )
266 {
267  // No need to check whether the two nodes belong to the same tree.
268  // If they don't, this loop will simply exit without success, so that a nullptr is returend.
269  for( auto it : node_links( lhs ) ) {
270  if( &it.link().outer().node() == &rhs ) {
271  return &it.link().edge();
272  }
273  }
274  return nullptr;
275 }
276 
277 // =================================================================================================
278 // Output Printing
279 // =================================================================================================
280 
281 std::string print_info( Tree const& tree )
282 {
283  return "<genesis::tree::Tree"
284  " node_count=" + std::to_string( tree.node_count() ) +
285  " edge_count=" + std::to_string( tree.edge_count() ) +
286  " link_count=" + std::to_string( tree.link_count() ) +
287  ">";
288 }
289 
290 std::string print_info( TreeEdge const& edge )
291 {
292  return "<genesis::tree::TreeEdge"
293  " index=" + std::to_string( edge.index() ) +
294  " has_data=" + ( edge.has_data() ? "true" : "false" ) +
295  ">";
296 }
297 
298 std::string print_info( TreeLink const& link )
299 {
300  return "<genesis::tree::TreeLink"
301  " index=" + std::to_string( link.index() ) +
302  // " has_data=" + ( edge.has_data() ? "true" : "false" ) +
303  ">";
304 }
305 
306 std::string print_info( TreeNode const& node )
307 {
308  return "<genesis::tree::TreeNode"
309  " index=" + std::to_string( node.index() ) +
310  " has_data=" + ( node.has_data() ? "true" : "false" ) +
311  ">";
312 }
313 
314 std::string print_gist( Tree const& tree, int items )
315 {
316  auto pc = PrinterCompact();
317  pc.limit( items );
318  return pc.print( tree );
319 }
320 
321 std::ostream& operator << ( std::ostream& out, Tree const& tree )
322 {
323  if( utils::Options::get().print_object_infos() ) {
324  out << print_info( tree );
325  }
326  out << print_gist( tree, utils::Options::get().print_object_gists() );
327  return out;
328 }
329 
330 std::ostream& operator << ( std::ostream& out, TreeEdge const& edge )
331 {
332  if( utils::Options::get().print_object_infos() ) {
333  out << print_info( edge );
334  }
335  return out;
336 }
337 
338 std::ostream& operator << ( std::ostream& out, TreeLink const& link )
339 {
340  if( utils::Options::get().print_object_infos() ) {
341  out << print_info( link );
342  }
343  return out;
344 }
345 
346 std::ostream& operator << ( std::ostream& out, TreeNode const& node )
347 {
348  if( utils::Options::get().print_object_infos() ) {
349  out << print_info( node );
350  }
351  return out;
352 }
353 
354 // =================================================================================================
355 // Validate
356 // =================================================================================================
357 
364 bool validate_topology( Tree const& tree )
365 {
366  // -----------------------------------------------------
367  // Empty Tree
368  // -----------------------------------------------------
369 
370  // check that the member arrays are valid: if at least one of them is empty, the tree is not
371  // fully initialized, so either it is a new tree without any data (all arrays empty, valid),
372  // or some are empty, but others not (not valid).
373  if (tree.links_.empty() || tree.nodes_.empty() || tree.edges_.empty()) {
374  bool empty = tree.links_.empty() && tree.nodes_.empty() && tree.edges_.empty();
375  if( !empty ) {
376  LOG_INFO << "Tree is not empty, but one of its data members is.";
377  }
378  return empty;
379  }
380 
381  // -----------------------------------------------------
382  // Links
383  // -----------------------------------------------------
384 
385  // Check Links.
386  std::vector<size_t> links_to_edges(tree.edges_.size(), 0);
387  std::vector<size_t> links_to_nodes(tree.nodes_.size(), 0);
388  for (size_t i = 0; i < tree.links_.size(); ++i) {
389  // Check indices.
390  if( i != tree.link_at(i).index() ) {
391  LOG_INFO << "Link at index " << i << " has wrong index ("
392  << tree.link_at(i).index() << ").";
393  return false;
394  }
395 
396  // Check next cycle and node.
397  auto nl = tree.links_[i].get();
398  do {
399  if( &nl->node() != &tree.links_[i]->node() ) {
400  LOG_INFO << "Link at index " << nl->index() << " points to wrong node.";
401  return false;
402  }
403  nl = &nl->next();
404  } while(nl != tree.links_[i].get());
405  ++links_to_nodes[tree.links_[i]->node().index()];
406 
407  // Check outer cycle.
408  if( &tree.links_[i]->outer().outer() != tree.links_[i].get() ) {
409  LOG_INFO << "Link at index " << i << " has wrong outer link.";
410  return false;
411  }
412 
413  // Check edge.
414  auto edge = &tree.links_[i]->edge();
415  if(
416  &edge->primary_link() != tree.links_[i].get() &&
417  &edge->secondary_link() != tree.links_[i].get()
418  ) {
419  LOG_INFO << "Link at index " << i << " has wrong edge pointer.";
420  return false;
421  }
422  ++links_to_edges[tree.links_[i]->edge().index()];
423  }
424 
425  // Check if all edges have been hit twice.
426  for (size_t i = 0; i < tree.edges_.size(); ++i) {
427  if (links_to_edges[i] != 2) {
428  LOG_INFO << "Edge at index " << i << " is not visited twice but " << links_to_edges[i]
429  << " times when traversing the links.";
430  return false;
431  }
432  }
433 
434  // Check if all nodes have been hit as many times as their degree is.
435  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
436  if (links_to_nodes[i] != degree( *tree.nodes_[i] ) ) {
437  LOG_INFO << "Node at index " << i << " is not visited its degree ("
438  << degree( *tree.nodes_[i] )
439  << ") times, but " << links_to_nodes[i] << " times when "
440  << "traversing the links.";
441  return false;
442  }
443  }
444 
445  // -----------------------------------------------------
446  // Nodes
447  // -----------------------------------------------------
448 
449  // Check Nodes.
450  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
451  // Check indices.
452  if( i != tree.node_at(i).index() ) {
453  LOG_INFO << "Node at index " << i << " has wrong index ("
454  << tree.node_at(i).index() << ").";
455  return false;
456  }
457 
458  // Check link.
459  if( &tree.nodes_[i]->link().node() != tree.nodes_[i].get() ) {
460  LOG_INFO << "Node at index " << i << " has wrong link.";
461  return false;
462  }
463 
464  // If a node claims to be the root, it better be the root.
465  if( is_root( tree.node_at(i) ) && i != tree.root_node().index() ) {
466  LOG_INFO << "Node at index " << i << " has is_root(), but it is not tree.root_node().";
467  return false;
468  }
469 
470  // Except for the root, all nodes must have a primary link that is the secondary link
471  // of its edge.
472  if( ! is_root( tree.node_at(i) ) &&
473  &tree.node_at(i).primary_link() != &tree.node_at(i).primary_link().edge().secondary_link()
474  ) {
475  LOG_INFO << "Node at " << i << " (not the root node) has a primary link which is not "
476  << "the secondary link of its edge.";
477  return false;
478  }
479  }
480 
481  // Further check the root.
482  if( ! is_root( tree.root_node() ) ) {
483  LOG_INFO << "Root node does not have is_root().";
484  return false;
485  }
486 
487  // -----------------------------------------------------
488  // Edges
489  // -----------------------------------------------------
490 
491  // Check Edges.
492  for (size_t i = 0; i < tree.edges_.size(); ++i) {
493  // Check indices.
494  if( i != tree.edge_at(i).index() ) {
495  LOG_INFO << "Edge at index " << i << " has wrong index ("
496  << tree.edge_at(i).index() << ").";
497  return false;
498  }
499 
500  // Check links.
501  if( &tree.edges_[i]->primary_link().edge() != tree.edges_[i].get() ) {
502  LOG_INFO << "Edge at index " << i << " has wrong primary link.";
503  return false;
504  }
505  if( &tree.edges_[i]->secondary_link().edge() != tree.edges_[i].get() ) {
506  LOG_INFO << "Edge at index " << i << " has wrong secondary link.";
507  return false;
508  }
509 
510  // Check outer links.
511  if( &tree.edges_[i]->primary_link().outer() != &tree.edges_[i]->secondary_link() ) {
512  LOG_INFO << "Edge at index " << i << " has a primary link that does not "
513  << "connect to its secondary link.";
514  return false;
515  }
516  if( &tree.edges_[i]->secondary_link().outer() != &tree.edges_[i]->primary_link() ) {
517  LOG_INFO << "Edge at index " << i << " has a secondary link that does not "
518  << "connect to its primary link.";
519  return false;
520  }
521 
522  // Check primary node, except for root.
523  if( ! is_root( tree.edge_at(i).primary_node() ) &&
524  &tree.edge_at(i).primary_node().primary_link() == &tree.edge_at(i).primary_link()
525  ) {
526  LOG_INFO << "Edge at " << i << " has a primary node that does not "
527  << "point towards the root.";
528  return false;
529  }
530 
531  // Check secondary node.
532  if( &tree.edge_at(i).secondary_node().primary_link() != &tree.edge_at(i).secondary_link() ) {
533  LOG_INFO << "Edge at " << i << " has a secondary node that does not "
534  << "point towards the root.";
535  return false;
536  }
537  }
538 
539  // -----------------------------------------------------
540  // Eulertour
541  // -----------------------------------------------------
542 
543  // If we are here, all three arrays (links, nodes, edges) contain data, so we can start a full
544  // traversal along all links.
545 
546  // Count, how many times the elements are hit while traversing.
547  std::vector<size_t> it_links(tree.links_.size(), 0);
548  std::vector<size_t> it_edges(tree.edges_.size(), 0);
549  std::vector<size_t> it_nodes(tree.nodes_.size(), 0);
550 
551  // Do the traversal. We do not use the iterator here, to keep it simple when testing.
552  // (We want to validate the tree here, not the iterator.)
553  auto link = tree.links_.front().get();
554  do {
555  ++it_links[ link->index() ];
556  ++it_edges[ link->edge().index() ];
557  ++it_nodes[ link->node().index() ];
558  link = &link->next().outer();
559 
560  // Check if we have a loop. Baaad.
561  if( it_links[ link->index() ] > 1 ) {
562  LOG_INFO << "Loop or other kind of wrong link chain in Tree.";
563  return false;
564  }
565  } while (link != tree.links_.front().get());
566 
567  // Check if all links have been hit once.
568  for (size_t i = 0; i < tree.links_.size(); i++) {
569  if (it_links[i] != 1) {
570  LOG_INFO << "Link at index " << i << " is not visited 1 but " << it_links[i]
571  << " times when iterating the tree.";
572  return false;
573  }
574  }
575 
576  // Check if all edges have been hit twice.
577  for (size_t i = 0; i < tree.edges_.size(); ++i) {
578  if (it_edges[i] != 2) {
579  LOG_INFO << "Edge at index " << i << " is not visited 2 but " << it_edges[i]
580  << " times when iterating the tree.";
581  return false;
582  }
583  }
584 
585  // Check if all nodes have been hit as many times as their degree is.
586  for (size_t i = 0; i < tree.nodes_.size(); ++i) {
587  if (it_nodes[i] != degree( *tree.nodes_[i] ) ) {
588  LOG_INFO << "Node at index " << i << " is not visited "
589  << degree( *tree.nodes_[i] )
590  << " times, but " << it_nodes[i] << " times when iterating the "
591  << "tree.";
592  return false;
593  }
594  }
595 
596  return true;
597 }
598 
599 } // namespace tree
600 } // namespace genesis
std::string print_info(Tree const &tree)
Tree convert(Tree const &source, std::function< std::unique_ptr< BaseNodeData >(BaseNodeData const &node_data)> node_data_converter, std::function< std::unique_ptr< BaseEdgeData >(BaseEdgeData const &edge_data)> edge_data_converter)
Create a tree with the same topology as the source tree, while converting its data.
TreeLink & secondary_link()
Return the TreeLink of this TreeEdge that points away from the root.
Definition: edge.hpp:129
size_t index() const
Return the index of this Edge.
Definition: edge.hpp:105
utils::Range< IteratorNodeLinks< TreeLink const, TreeNode const, TreeEdge const > > node_links(ElementType const &element)
Definition: node_links.hpp:175
size_t degree(TreeNode const &node)
Return the degree of the node, i.e. how many neighbouring nodes it has.
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
Base class for storing data on Nodes of a Tree.
Definition: node_data.hpp:66
Tree operator functions.
Print a Tree in a compact form, i.e., each node and edge on one line.
Definition: compact.hpp:82
TreeLink & primary_link()
Return the TreeLink that points towards the root.
Definition: node.hpp:108
bool is_root(TreeNode const &node)
Return whether the node is the root of its Tree.
TreeNode & secondary_node()
Return the TreeNode of this TreeEdge that points away from the root.
Definition: edge.hpp:161
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:381
std::string print_gist(Tree const &tree, int items)
TreeEdge * edge_between(TreeNode &lhs, TreeNode &rhs)
Return the TreeEdge between two TreeNode&s, if they are neighbours, or nullptr otherwise.
size_t index() const
Return the index of this Node.
Definition: node.hpp:100
utils::Range< IteratorPreorder< TreeLink const, TreeNode const, TreeEdge const > > preorder(ElementType const &element)
TreeLink & link_at(size_t index)
Return the TreeLink at a certain index.
Definition: tree/tree.cpp:284
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
TreeNode & primary_node()
Return the TreeNode of this TreeEdge that points towards the root.
Definition: edge.hpp:145
BaseEdgeData * data_ptr()
Return a pointer to the data.
Definition: edge.hpp:212
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
bool belongs_to(Tree const &tree, TreeNode const &node)
Return whether the TreeNode belongs to the Tree, i.e., whether it is owned by the Tree...
Tree clone_topology() const
Return a Tree with the same topology, but without any data.
Definition: tree/tree.cpp:137
bool has_data() const
Return true if the TreeEdge has a data object assigned to it.
Definition: edge.hpp:177
bool validate_topology(Tree const &tree)
Validate that all internal pointers of the Tree elements (TreeLinks, TreeNodes, TreeEdges) to each ot...
Provides easy and fast logging functionality.
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.cpp:358
TreeEdge & edge_at(size_t index)
Return the TreeEdge at a certain index.
Definition: tree/tree.cpp:324
bool has_data() const
Return true if the TreeNode has a data object assigned to it.
Definition: node.hpp:146
TreeLink & primary_link()
Return the TreeLink of this TreeEdge that points towards the root.
Definition: edge.hpp:113
TreeNode & reset_data(std::unique_ptr< BaseNodeData > data)
Reset the data pointer of this TreeNode.
Definition: node.hpp:239
Base class for storing data on Edges of a Tree.
Definition: edge_data.hpp:66
bool identical_topology(Tree const &lhs, Tree const &rhs)
Returns true iff both trees have an identical topology.
std::ostream & operator<<(std::ostream &out, Tree const &tree)
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
static Options & get()
Returns a single instance of this class.
Definition: options.hpp:60
BaseNodeData * data_ptr()
Return a pointer to the data.
Definition: node.hpp:181
size_t link_count() const
Return the number of TreeLinks of the Tree.
Definition: tree/tree.cpp:342
#define LOG_INFO
Log an info message. See genesis::utils::LoggingLevel.
Definition: logging.hpp:98