A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
path.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_ITERATOR_PATH_H_
2 #define GENESIS_TREE_ITERATOR_PATH_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include "genesis/tree/tree.hpp"
36 
37 #include <assert.h>
38 #include <iterator>
39 #include <vector>
40 
41 namespace genesis {
42 namespace tree {
43 
44 // =================================================================================================
45 // Forward Declarations
46 // =================================================================================================
47 
48 class Tree;
49 class TreeNode;
50 class TreeEdge;
51 class TreeLink;
52 
53 // =================================================================================================
54 // Path Iterator
55 // =================================================================================================
56 
57 template <typename LinkType, typename NodeType, typename EdgeType>
59 {
60 
61 public:
62 
63  // -----------------------------------------------------
64  // Typedefs
65  // -----------------------------------------------------
66 
67  using iterator_category = std::forward_iterator_tag;
69 
70  // -----------------------------------------------------
71  // Constructors and Rule of Five
72  // -----------------------------------------------------
73 
75  : start_( nullptr )
76  , finish_( nullptr )
77  , lca_( nullptr )
78  , reverse_path_()
79  {}
80 
81  IteratorPath( NodeType& start, NodeType& finish )
82  : IteratorPath( start.link(), finish.link() )
83  {}
84 
85  IteratorPath( LinkType& start, LinkType& finish )
86  : start_( &start.node().primary_link() )
87  , finish_( &finish.node().primary_link() )
88  , lca_( nullptr )
89  , reverse_path_()
90  {
91  // In this constructor, we find and store the primary links of the nodes on the path.
92  // Then, when moving along the path with this iterator (operator++), we simply need to
93  // move along that list of links.
94  // Furthermore, by using the primary links of the nodes (that is, the ones pointing towards
95  // the root), we can easily spot the last common ancestor (LCA) of the start and finish
96  // node.
97 
98  // TODO there is a lot of code duplication between this and the lowest common ancestor
99  // TODO function. this is bad and could be resolved some how...
100 
101  // Helper function to find all links between a given link and the root.
102  auto path_to_root = [] ( LinkType* link ) {
103  std::vector<LinkType*> path;
104 
105  // Move towards the root and record all links in between.
106  LinkType* cur_link = link;
107  while( &cur_link->edge().secondary_link() == cur_link ) {
108 
109  // The above while condition means: is it the root?! Assert, that the default way of
110  // checking for the root by using the node gives the same result.
111  assert( ! cur_link->node().is_root() );
112 
113  // Assert that the primary direction is correct.
114  assert( cur_link == &cur_link->edge().secondary_link() );
115 
116  // Add the primary link of the current node to the list.
117  path.push_back( cur_link );
118 
119  // Move one node towards the root.
120  // Assert that the default way of finding the next node towards the root (by using
121  // the edge) gives the same result as simply using the link's outer node.
122  // This is the case because the cur link is the one that points towards the root
123  // (which was asserted above).
124  assert( &cur_link->edge().primary_link() == &cur_link->outer() );
125  cur_link = &cur_link->outer().node().primary_link();
126  }
127 
128  // Now finally add the root itself and return the list.
129  assert( cur_link->node().is_root() );
130  path.push_back( cur_link );
131  return path;
132  };
133 
134  // Treat special case start == finish.
135  // That makes sure that we do not need to specially check for an empty path later.
136  if( start_ == finish_ ) {
137  reverse_path_.push_back( start_ );
138  lca_ = start_;
139  return;
140  }
141 
142  // Get paths to root for both links.
143  auto start_path = path_to_root( start_ );
144  auto finish_path = path_to_root( finish_ );
145 
146  // We must have at least the two original links in the front and the root in the back.
147  assert( start_path.size() > 0 && finish_path.size() > 0 );
148  assert( start_path.front() == start_ );
149  assert( finish_path.front() == finish_ );
150  assert( start_path.back() == finish_path.back() );
151 
152  // Remove from back as long as the last two elements are the same.
153  // At the end of this, the remaining links are the ones on the path between
154  // the two original links.
155  while(
156  start_path.size() > 1 &&
157  finish_path.size() > 1 &&
158  start_path.at( start_path.size() - 1 ) == finish_path.at( finish_path.size() - 1 ) &&
159  start_path.at( start_path.size() - 2 ) == finish_path.at( finish_path.size() - 2 )
160  ) {
161  start_path.pop_back();
162  finish_path.pop_back();
163  }
164 
165  // Now, the last elements need to be the same (the LCA of the start and finish node).
166  assert( start_path.size() > 0 && finish_path.size() > 0 );
167  assert( start_path.back() == finish_path.back() );
168 
169  // The LCA (last common ancestor) is the node that both paths have in common. Store it.
170  lca_ = start_path.back();
171 
172  // We store the path backwards, because removing from a vector's end is faster.
173  // Thus, first add the path from finish to root/LCA, then from root/LCA to start (reversed).
174  // Also, remove the root/LCA once, otherwise, it would appear twice, as it is in both lists.
175  reverse_path_ = std::move( finish_path );
176  reverse_path_.pop_back();
177  reverse_path_.insert( reverse_path_.end(), start_path.rbegin(), start_path.rend() );
178 
179  assert( reverse_path_.front() == finish_ );
180  assert( reverse_path_.back() == start_ );
181  }
182 
183  ~IteratorPath() = default;
184 
185  IteratorPath( IteratorPath const& ) = default;
186  IteratorPath( IteratorPath&& ) = default;
187 
188  IteratorPath& operator= ( IteratorPath const& ) = default;
189  IteratorPath& operator= ( IteratorPath&& ) = default;
190 
191  // -----------------------------------------------------
192  // Operators
193  // -----------------------------------------------------
194 
196  {
197  return *this;
198  }
199 
201  {
202  // If there is more than one element in the path, move to the next one.
203  if( reverse_path_.size() > 1 ) {
204  reverse_path_.pop_back();
205 
206  // If there is only one element left (which was just visited), we are done.
207  } else {
208  start_ = nullptr;
209  finish_ = nullptr;
210  lca_ = nullptr;
211  reverse_path_.clear();
212  }
213  return *this;
214  }
215 
217  {
218  self_type tmp = *this;
219  ++(*this);
220  return tmp;
221  }
222 
223  bool operator == (const self_type &other) const
224  {
225  return other.start_ == start_ &&
226  other.finish_ == finish_ &&
227  other.reverse_path_.size() == reverse_path_.size();
228  }
229 
230  bool operator != (const self_type &other) const
231  {
232  return !(other == *this);
233  }
234 
235  // -----------------------------------------------------
236  // Members
237  // -----------------------------------------------------
238 
258  {
259  return reverse_path_.back() == lca_;
260  }
261 
265  bool is_lca() const
266  {
267  return is_last_common_ancestor();
268  }
269 
270  LinkType& link() const
271  {
272  return *reverse_path_.back();
273  }
274 
275  NodeType& node() const
276  {
277  return reverse_path_.back()->node();
278  }
279 
280  EdgeType& edge() const
281  {
282  return reverse_path_.back()->edge();
283  }
284 
285  LinkType& start_link() const
286  {
287  return *start_;
288  }
289 
290  NodeType& start_node() const
291  {
292  return start_->node();
293  }
294 
295  LinkType& finish_link() const
296  {
297  return *finish_;
298  }
299 
300  NodeType& finish_node() const
301  {
302  return finish_->node();
303  }
304 
305 private:
306 
307  LinkType* start_;
308  LinkType* finish_;
309  LinkType* lca_;
310 
311  // Store the path between the finish and the start (thus, reversed). We do it this way
312  // as we can then simply pop elements from the vector's end while iterating, which is fast.
313  // All links stored in this vector are the primary links of their nodes (that is, they
314  // are pointing towards the root).
315  std::vector<LinkType*> reverse_path_;
316 
317 };
318 
319 // =================================================================================================
320 // Path Wrapper Functions
321 // =================================================================================================
322 
323 template<typename ElementType>
325 path( ElementType const& start, ElementType const& finish )
326 {
327  return {
330  };
331 }
332 
333 template<typename ElementType>
335 path( ElementType& start, ElementType& finish )
336 {
337  return {
340  };
341 }
342 
343 } // namespace tree
344 } // namespace genesis
345 
346 #endif // include guard
IteratorPath(LinkType &start, LinkType &finish)
Definition: path.hpp:85
LinkType & finish_link() const
Definition: path.hpp:295
utils::Range< IteratorPath< TreeLink const, TreeNode const, TreeEdge const > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:325
bool operator==(const self_type &other) const
Definition: path.hpp:223
LinkType & link() const
Definition: path.hpp:270
IteratorPath(NodeType &start, NodeType &finish)
Definition: path.hpp:81
NodeType & finish_node() const
Definition: path.hpp:300
std::forward_iterator_tag iterator_category
Definition: path.hpp:67
NodeType & start_node() const
Definition: path.hpp:290
bool is_last_common_ancestor() const
Return whether the current iterator position (node) is the last common ancestor of the two start and ...
Definition: path.hpp:257
IteratorPath & operator=(IteratorPath const &)=default
bool is_lca() const
Alias for is_last_common_ancestor(). See there for more information.
Definition: path.hpp:265
bool operator!=(const self_type &other) const
Definition: path.hpp:230
NodeType & node() const
Definition: path.hpp:275
Header of Tree class.
std::vector< TreeLink const * > path_to_root(TreeNode const &node)
Helper function that finds all TreeLinks between a given TreeNode and the root of the Tree...
EdgeType & edge() const
Definition: path.hpp:280
self_type operator++()
Definition: path.hpp:200
LinkType & start_link() const
Definition: path.hpp:285