A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
path.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_ITERATOR_PATH_H_
2 #define GENESIS_TREE_ITERATOR_PATH_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 #include "genesis/tree/tree.hpp"
37 
38 #include <assert.h>
39 #include <iterator>
40 #include <vector>
41 
42 namespace genesis {
43 namespace tree {
44 
45 // =================================================================================================
46 // Forward Declarations
47 // =================================================================================================
48 
49 class Tree;
50 class TreeNode;
51 class TreeEdge;
52 class TreeLink;
53 
54 // =================================================================================================
55 // Path Iterator
56 // =================================================================================================
57 
58 template <typename LinkType, typename NodeType, typename EdgeType>
60 {
61 
62 public:
63 
64  // -----------------------------------------------------
65  // Typedefs
66  // -----------------------------------------------------
67 
68  using iterator_category = std::forward_iterator_tag;
70 
71  // -----------------------------------------------------
72  // Constructors and Rule of Five
73  // -----------------------------------------------------
74 
76  : start_( nullptr )
77  , finish_( nullptr )
78  , lca_( nullptr )
79  , reverse_path_()
80  {}
81 
82  IteratorPath( NodeType& start, NodeType& finish )
83  : IteratorPath( start.link(), finish.link() )
84  {}
85 
86  IteratorPath( LinkType& start, LinkType& finish )
87  : start_( &start.node().primary_link() )
88  , finish_( &finish.node().primary_link() )
89  , lca_( nullptr )
90  , reverse_path_()
91  {
92  // In this constructor, we find and store the primary links of the nodes on the path.
93  // Then, when moving along the path with this iterator (operator++), we simply need to
94  // move along that list of links.
95  // Furthermore, by using the primary links of the nodes (that is, the ones pointing towards
96  // the root), we can easily spot the last common ancestor (LCA) of the start and finish
97  // node.
98 
99  // TODO there is a lot of code duplication between this and the lowest common ancestor
100  // TODO function. this is bad and could be resolved some how...
101 
102  // Helper function to find all links between a given link and the root.
103  auto path_to_root = [] ( LinkType* link ) {
104  std::vector<LinkType*> path;
105 
106  // Move towards the root and record all links in between.
107  LinkType* cur_link = link;
108  while( &cur_link->edge().secondary_link() == cur_link ) {
109 
110  // The above while condition means: is it the root?! Assert, that the default way of
111  // checking for the root by using the node gives the same result.
112  assert( ! is_root( cur_link->node() ));
113 
114  // Assert that the primary direction is correct.
115  assert( cur_link == &cur_link->edge().secondary_link() );
116 
117  // Add the primary link of the current node to the list.
118  path.push_back( cur_link );
119 
120  // Move one node towards the root.
121  // Assert that the default way of finding the next node towards the root (by using
122  // the edge) gives the same result as simply using the link's outer node.
123  // This is the case because the cur link is the one that points towards the root
124  // (which was asserted above).
125  assert( &cur_link->edge().primary_link() == &cur_link->outer() );
126  cur_link = &cur_link->outer().node().primary_link();
127  }
128 
129  // Now finally add the root itself and return the list.
130  assert( is_root( cur_link->node() ));
131  path.push_back( cur_link );
132  return path;
133  };
134 
135  // Treat special case start == finish.
136  // That makes sure that we do not need to specially check for an empty path later.
137  if( start_ == finish_ ) {
138  reverse_path_.push_back( start_ );
139  lca_ = start_;
140  return;
141  }
142 
143  // Get paths to root for both links.
144  auto start_path = path_to_root( start_ );
145  auto finish_path = path_to_root( finish_ );
146 
147  // We must have at least the two original links in the front and the root in the back.
148  assert( start_path.size() > 0 && finish_path.size() > 0 );
149  assert( start_path.front() == start_ );
150  assert( finish_path.front() == finish_ );
151  assert( start_path.back() == finish_path.back() );
152 
153  // Remove from back as long as the last two elements are the same.
154  // At the end of this, the remaining links are the ones on the path between
155  // the two original links.
156  while(
157  start_path.size() > 1 &&
158  finish_path.size() > 1 &&
159  start_path.at( start_path.size() - 1 ) == finish_path.at( finish_path.size() - 1 ) &&
160  start_path.at( start_path.size() - 2 ) == finish_path.at( finish_path.size() - 2 )
161  ) {
162  start_path.pop_back();
163  finish_path.pop_back();
164  }
165 
166  // Now, the last elements need to be the same (the LCA of the start and finish node).
167  assert( start_path.size() > 0 && finish_path.size() > 0 );
168  assert( start_path.back() == finish_path.back() );
169 
170  // The LCA (last common ancestor) is the node that both paths have in common. Store it.
171  lca_ = start_path.back();
172 
173  // We store the path backwards, because removing from a vector's end is faster.
174  // Thus, first add the path from finish to root/LCA, then from root/LCA to start (reversed).
175  // Also, remove the root/LCA once, otherwise, it would appear twice, as it is in both lists.
176  reverse_path_ = std::move( finish_path );
177  reverse_path_.pop_back();
178  reverse_path_.insert( reverse_path_.end(), start_path.rbegin(), start_path.rend() );
179 
180  assert( reverse_path_.front() == finish_ );
181  assert( reverse_path_.back() == start_ );
182  }
183 
184  ~IteratorPath() = default;
185 
186  IteratorPath( IteratorPath const& ) = default;
187  IteratorPath( IteratorPath&& ) = default;
188 
189  IteratorPath& operator= ( IteratorPath const& ) = default;
190  IteratorPath& operator= ( IteratorPath&& ) = default;
191 
192  // -----------------------------------------------------
193  // Operators
194  // -----------------------------------------------------
195 
197  {
198  return *this;
199  }
200 
202  {
203  // If there is more than one element in the path, move to the next one.
204  if( reverse_path_.size() > 1 ) {
205  reverse_path_.pop_back();
206 
207  // If there is only one element left (which was just visited), we are done.
208  } else {
209  start_ = nullptr;
210  finish_ = nullptr;
211  lca_ = nullptr;
212  reverse_path_.clear();
213  }
214  return *this;
215  }
216 
218  {
219  self_type tmp = *this;
220  ++(*this);
221  return tmp;
222  }
223 
224  bool operator == (const self_type &other) const
225  {
226  return other.start_ == start_ &&
227  other.finish_ == finish_ &&
228  other.reverse_path_.size() == reverse_path_.size();
229  }
230 
231  bool operator != (const self_type &other) const
232  {
233  return !(other == *this);
234  }
235 
236  // -----------------------------------------------------
237  // Members
238  // -----------------------------------------------------
239 
259  {
260  return reverse_path_.back() == lca_;
261  }
262 
266  bool is_lca() const
267  {
268  return is_last_common_ancestor();
269  }
270 
271  LinkType& link() const
272  {
273  return *reverse_path_.back();
274  }
275 
276  NodeType& node() const
277  {
278  return reverse_path_.back()->node();
279  }
280 
281  EdgeType& edge() const
282  {
283  return reverse_path_.back()->edge();
284  }
285 
286  LinkType& start_link() const
287  {
288  return *start_;
289  }
290 
291  NodeType& start_node() const
292  {
293  return start_->node();
294  }
295 
296  LinkType& finish_link() const
297  {
298  return *finish_;
299  }
300 
301  NodeType& finish_node() const
302  {
303  return finish_->node();
304  }
305 
306 private:
307 
308  LinkType* start_;
309  LinkType* finish_;
310  LinkType* lca_;
311 
312  // Store the path between the finish and the start (thus, reversed). We do it this way
313  // as we can then simply pop elements from the vector's end while iterating, which is fast.
314  // All links stored in this vector are the primary links of their nodes (that is, they
315  // are pointing towards the root).
316  std::vector<LinkType*> reverse_path_;
317 
318 };
319 
320 // =================================================================================================
321 // Path Wrapper Functions
322 // =================================================================================================
323 
324 template<typename ElementType>
326 path( ElementType const& start, ElementType const& finish )
327 {
328  return {
331  };
332 }
333 
334 template<typename ElementType>
336 path( ElementType& start, ElementType& finish )
337 {
338  return {
341  };
342 }
343 
344 } // namespace tree
345 } // namespace genesis
346 
347 #endif // include guard
IteratorPath(LinkType &start, LinkType &finish)
Definition: path.hpp:86
LinkType & finish_link() const
Definition: path.hpp:296
utils::Range< IteratorPath< TreeLink const, TreeNode const, TreeEdge const > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:326
bool operator==(const self_type &other) const
Definition: path.hpp:224
LinkType & link() const
Definition: path.hpp:271
bool is_root(TreeNode const &node)
Return whether the node is the root of its Tree.
IteratorPath(NodeType &start, NodeType &finish)
Definition: path.hpp:82
NodeType & finish_node() const
Definition: path.hpp:301
std::forward_iterator_tag iterator_category
Definition: path.hpp:68
NodeType & start_node() const
Definition: path.hpp:291
bool is_last_common_ancestor() const
Return whether the current iterator position (node) is the last common ancestor of the two start and ...
Definition: path.hpp:258
IteratorPath & operator=(IteratorPath const &)=default
bool is_lca() const
Alias for is_last_common_ancestor(). See there for more information.
Definition: path.hpp:266
bool operator!=(const self_type &other) const
Definition: path.hpp:231
NodeType & node() const
Definition: path.hpp:276
Header of Tree class.
std::vector< TreeLink const * > path_to_root(TreeNode const &node)
Helper function that finds all TreeLinks between a given TreeNode and the root of the Tree...
EdgeType & edge() const
Definition: path.hpp:281
self_type operator++()
Definition: path.hpp:201
LinkType & start_link() const
Definition: path.hpp:286