A library for working with phylogenetic and population genetic data.
v0.27.0
path.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_ITERATOR_PATH_H_
2 #define GENESIS_TREE_ITERATOR_PATH_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 #include "genesis/tree/tree.hpp"
37 
38 #include <cassert>
39 #include <iterator>
40 #include <vector>
41 #include <type_traits>
42 
43 namespace genesis {
44 namespace tree {
45 
46 // =================================================================================================
47 // Forward Declarations
48 // =================================================================================================
49 
50 class Tree;
51 class TreeNode;
52 class TreeEdge;
53 class TreeLink;
54 
55 // =================================================================================================
56 // Path Iterator
57 // =================================================================================================
58 
59 template< bool is_const = true >
61 {
62 
63 public:
64 
65  // -----------------------------------------------------
66  // Typedefs
67  // -----------------------------------------------------
68 
69  // Make the memer types const or not, depending on iterator type.
70  using TreeType = typename std::conditional< is_const, Tree const, Tree >::type;
71  using LinkType = typename std::conditional< is_const, TreeLink const, TreeLink >::type;
72  using NodeType = typename std::conditional< is_const, TreeNode const, TreeNode >::type;
73  using EdgeType = typename std::conditional< is_const, TreeEdge const, TreeEdge >::type;
74 
76  using iterator_category = std::forward_iterator_tag;
77  // using value_type = NodeType;
78  // using pointer = NodeType*;
79  // using reference = NodeType&;
80  // using difference_type = std::ptrdiff_t;
81 
82  // -----------------------------------------------------
83  // Constructors and Rule of Five
84  // -----------------------------------------------------
85 
87  : start_( nullptr )
88  , finish_( nullptr )
89  , lca_( nullptr )
90  , reverse_path_()
91  {}
92 
93  IteratorPath( NodeType& start, NodeType& finish )
94  : IteratorPath( start.link(), finish.link() )
95  {}
96 
97  IteratorPath( LinkType& start, LinkType& finish )
98  : start_( &start.node().primary_link() )
99  , finish_( &finish.node().primary_link() )
100  , lca_( nullptr )
101  , reverse_path_()
102  {
103  // In this constructor, we find and store the primary links of the nodes on the path.
104  // Then, when moving along the path with this iterator (operator++), we simply need to
105  // move along that list of links.
106  // Furthermore, by using the primary links of the nodes (that is, the ones pointing towards
107  // the root), we can easily spot the last common ancestor (LCA) of the start and finish
108  // node.
109 
110  // TODO there is a lot of code duplication between this and the lowest common ancestor
111  // TODO function. this is bad and could be resolved some how...
112 
113  // Helper function to find all links between a given link and the root.
114  auto path_to_root = [] ( LinkType* link ) {
115  std::vector<LinkType*> link_path;
116 
117  // Move towards the root and record all links in between.
118  LinkType* cur_link = link;
119  while( &cur_link->edge().secondary_link() == cur_link ) {
120 
121  // The above while condition means: is it the root?! Assert, that the default way of
122  // checking for the root by using the node gives the same result.
123  assert( ! is_root( cur_link->node() ));
124 
125  // Assert that the primary direction is correct.
126  assert( cur_link == &cur_link->edge().secondary_link() );
127 
128  // Add the primary link of the current node to the list.
129  link_path.push_back( cur_link );
130 
131  // Move one node towards the root.
132  // Assert that the default way of finding the next node towards the root (by using
133  // the edge) gives the same result as simply using the link's outer node.
134  // This is the case because the cur link is the one that points towards the root
135  // (which was asserted above).
136  assert( &cur_link->edge().primary_link() == &cur_link->outer() );
137  cur_link = &cur_link->outer().node().primary_link();
138  }
139 
140  // Now finally add the root itself and return the list.
141  assert( is_root( cur_link->node() ));
142  link_path.push_back( cur_link );
143  return link_path;
144  };
145 
146  // Treat special case start == finish.
147  // That makes sure that we do not need to specially check for an empty path later.
148  if( start_ == finish_ ) {
149  reverse_path_.push_back( start_ );
150  lca_ = start_;
151  return;
152  }
153 
154  // Get paths to root for both links.
155  auto start_path = path_to_root( start_ );
156  auto finish_path = path_to_root( finish_ );
157 
158  // We must have at least the two original links in the front and the root in the back.
159  assert( start_path.size() > 0 && finish_path.size() > 0 );
160  assert( start_path.front() == start_ );
161  assert( finish_path.front() == finish_ );
162  assert( start_path.back() == finish_path.back() );
163 
164  // Remove from back as long as the last two elements are the same.
165  // At the end of this, the remaining links are the ones on the path between
166  // the two original links.
167  while(
168  start_path.size() > 1 &&
169  finish_path.size() > 1 &&
170  start_path.at( start_path.size() - 1 ) == finish_path.at( finish_path.size() - 1 ) &&
171  start_path.at( start_path.size() - 2 ) == finish_path.at( finish_path.size() - 2 )
172  ) {
173  start_path.pop_back();
174  finish_path.pop_back();
175  }
176 
177  // Now, the last elements need to be the same (the LCA of the start and finish node).
178  assert( start_path.size() > 0 && finish_path.size() > 0 );
179  assert( start_path.back() == finish_path.back() );
180 
181  // The LCA (last common ancestor) is the node that both paths have in common. Store it.
182  lca_ = start_path.back();
183 
184  // We store the path backwards, because removing from a vector's end is faster.
185  // Thus, first add the path from finish to root/LCA, then from root/LCA to start (reversed).
186  // Also, remove the root/LCA once, otherwise, it would appear twice, as it is in both lists.
187  reverse_path_ = std::move( finish_path );
188  reverse_path_.pop_back();
189  reverse_path_.insert( reverse_path_.end(), start_path.rbegin(), start_path.rend() );
190 
191  assert( reverse_path_.front() == finish_ );
192  assert( reverse_path_.back() == start_ );
193  }
194 
195  ~IteratorPath() = default;
196 
197  IteratorPath( IteratorPath const& ) = default;
198  IteratorPath( IteratorPath&& ) = default;
199 
200  IteratorPath& operator= ( IteratorPath const& ) = default;
201  IteratorPath& operator= ( IteratorPath&& ) = default;
202 
203  // -----------------------------------------------------
204  // Operators
205  // -----------------------------------------------------
206 
208  {
209  return *this;
210  }
211 
213  {
214  // If there is more than one element in the path, move to the next one.
215  if( reverse_path_.size() > 1 ) {
216  reverse_path_.pop_back();
217 
218  // If there is only one element left (which was just visited), we are done.
219  } else {
220  start_ = nullptr;
221  finish_ = nullptr;
222  lca_ = nullptr;
223  reverse_path_.clear();
224  }
225  return *this;
226  }
227 
229  {
230  self_type tmp = *this;
231  ++(*this);
232  return tmp;
233  }
234 
235  bool operator == (const self_type &other) const
236  {
237  return other.start_ == start_ &&
238  other.finish_ == finish_ &&
239  other.reverse_path_.size() == reverse_path_.size();
240  }
241 
242  bool operator != (const self_type &other) const
243  {
244  return !(other == *this);
245  }
246 
247  // -----------------------------------------------------
248  // Members
249  // -----------------------------------------------------
250 
270  {
271  return reverse_path_.back() == lca_;
272  }
273 
277  bool is_lca() const
278  {
279  return is_last_common_ancestor();
280  }
281 
282  LinkType& link() const
283  {
284  return *reverse_path_.back();
285  }
286 
287  NodeType& node() const
288  {
289  return reverse_path_.back()->node();
290  }
291 
292  EdgeType& edge() const
293  {
294  return reverse_path_.back()->edge();
295  }
296 
298  {
299  return *start_;
300  }
301 
303  {
304  return start_->node();
305  }
306 
308  {
309  return *finish_;
310  }
311 
313  {
314  return finish_->node();
315  }
316 
317 private:
318 
319  LinkType* start_;
320  LinkType* finish_;
321  LinkType* lca_;
322 
323  // Store the path between the finish and the start (thus, reversed). We do it this way
324  // as we can then simply pop elements from the vector's end while iterating, which is fast.
325  // All links stored in this vector are the primary links of their nodes (that is, they
326  // are pointing towards the root).
327  std::vector<LinkType*> reverse_path_;
328 
329 };
330 
331 // =================================================================================================
332 // Path Wrapper Functions
333 // =================================================================================================
334 
335 template<typename ElementType>
337 path( ElementType const& start, ElementType const& finish )
338 {
339  return {
340  IteratorPath< true >( start, finish ),
342  };
343 }
344 
345 template<typename ElementType>
347 path( ElementType& start, ElementType& finish )
348 {
349  return {
350  IteratorPath< false >( start, finish ),
352  };
353 }
354 
355 } // namespace tree
356 } // namespace genesis
357 
358 #endif // include guard
genesis::tree::IteratorPath::iterator_category
std::forward_iterator_tag iterator_category
Definition: path.hpp:76
genesis::tree::IteratorPath::is_lca
bool is_lca() const
Alias for is_last_common_ancestor(). See there for more information.
Definition: path.hpp:277
genesis::tree::IteratorPath::IteratorPath
IteratorPath(LinkType &start, LinkType &finish)
Definition: path.hpp:97
genesis::tree::IteratorPath::IteratorPath
IteratorPath(NodeType &start, NodeType &finish)
Definition: path.hpp:93
genesis::tree::IteratorPath::finish_link
LinkType & finish_link() const
Definition: path.hpp:307
genesis::tree::IteratorPath::finish_node
NodeType & finish_node() const
Definition: path.hpp:312
genesis::tree::IteratorPath::edge
EdgeType & edge() const
Definition: path.hpp:292
genesis::tree::IteratorPath::NodeType
typename std::conditional< is_const, TreeNode const, TreeNode >::type NodeType
Definition: path.hpp:72
genesis::tree::IteratorPath::node
NodeType & node() const
Definition: path.hpp:287
tree.hpp
Header of Tree class.
functions.hpp
genesis::tree::IteratorPath::start_node
NodeType & start_node() const
Definition: path.hpp:302
genesis::tree::path_to_root
std::vector< TreeLink const * > path_to_root(TreeNode const &node)
Helper function that finds all TreeLinks between a given TreeNode and the root of the Tree.
Definition: tree/function/functions.cpp:580
genesis::tree::path
utils::Range< IteratorPath< true > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:337
genesis::tree::IteratorPath::IteratorPath
IteratorPath()
Definition: path.hpp:86
genesis::tree::IteratorPath::LinkType
typename std::conditional< is_const, TreeLink const, TreeLink >::type LinkType
Definition: path.hpp:71
genesis::tree::IteratorPath::operator==
bool operator==(const self_type &other) const
Definition: path.hpp:235
genesis::tree::IteratorPath::TreeType
typename std::conditional< is_const, Tree const, Tree >::type TreeType
Definition: path.hpp:70
range.hpp
genesis::tree::is_root
bool is_root(TreeLink const &link)
Return whether the link belongs to the root node of its Tree.
Definition: tree/function/functions.cpp:89
genesis::tree::IteratorPath::operator*
self_type operator*()
Definition: path.hpp:207
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::IteratorPath::is_last_common_ancestor
bool is_last_common_ancestor() const
Return whether the current iterator position (node) is the last common ancestor of the two start and ...
Definition: path.hpp:269
genesis::tree::IteratorPath::~IteratorPath
~IteratorPath()=default
genesis::tree::IteratorPath::EdgeType
typename std::conditional< is_const, TreeEdge const, TreeEdge >::type EdgeType
Definition: path.hpp:73
genesis::tree::IteratorPath::operator++
self_type operator++()
Definition: path.hpp:212
genesis::tree::IteratorPath::operator=
IteratorPath & operator=(IteratorPath const &)=default
genesis::tree::IteratorPath::start_link
LinkType & start_link() const
Definition: path.hpp:297
genesis::tree::IteratorPath::link
LinkType & link() const
Definition: path.hpp:282
genesis::tree::IteratorPath
Definition: path.hpp:60
genesis::tree::IteratorPath::operator!=
bool operator!=(const self_type &other) const
Definition: path.hpp:242