A library for working with phylogenetic and population genetic data.
v0.32.0
lca_lookup.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
37 
38 #include <algorithm>
39 #include <cassert>
40 #include <cstdint>
41 #include <limits>
42 #include <stdexcept>
43 
44 namespace genesis {
45 namespace tree {
46 
47 // =================================================================================================
48 // Construction and Rule of Five
49 // =================================================================================================
50 
51 LcaLookup::LcaLookup( Tree const& tree )
52  : tree_( &tree )
53 {
54  init_( tree );
55 }
56 
57 // =================================================================================================
58 // Loopup
59 // =================================================================================================
60 
61 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b, size_t root_index ) const
62 {
63  return lookup_( node_index_a, node_index_b, root_index );
64 }
65 
66 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b, TreeNode const& root_node ) const
67 {
68  auto const idx = lookup_( node_a.index(), node_b.index(), root_node.index() );
69  return tree_->node_at( idx );
70 }
71 
72 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b ) const
73 {
74  return lookup_( node_index_a, node_index_b, root_idx_ );
75 }
76 
77 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b ) const
78 {
79  auto const idx = lookup_( node_a.index(), node_b.index(), root_idx_ );
80  return tree_->node_at( idx );
81 }
82 
83 // =================================================================================================
84 // Internal Helper Functions
85 // =================================================================================================
86 
87 void LcaLookup::init_( Tree const& tree )
88 {
89  // Get distances from each node to the root, in number of edges.
90  auto const dists_to_root = node_path_length_vector( tree );
91 
92  // Store root, so that tree can be re-rooted outside of this class.
93  root_idx_ = tree.root_node().index();
94 
95  using RmqIntType = utils::RangeMinimumQuery::IntType;
96 
97  // Init vectors.
98  std::vector<RmqIntType> eulertour_levels;
99  eulertour_first_occurrence_.resize( tree.node_count() );
100  std::fill(
101  eulertour_first_occurrence_.begin(),
102  eulertour_first_occurrence_.end(),
103  std::numeric_limits<size_t>::max()
104  );
105 
106  // Fill data structures.
107  for( auto it : eulertour( tree )) {
108  auto const node_idx = it.node().index();
109 
110  if( dists_to_root[ node_idx ] >= static_cast<size_t>( std::numeric_limits<RmqIntType>::max() )) {
111  throw std::runtime_error( "Tree too large for building LCA Lookup." );
112  }
113 
114  eulertour_order_.push_back( node_idx );
115  eulertour_levels.push_back( static_cast<RmqIntType>( dists_to_root[ node_idx ] ));
116  if( eulertour_first_occurrence_[ node_idx ] == std::numeric_limits<size_t>::max() ) {
117  eulertour_first_occurrence_[ node_idx ] = eulertour_order_.size() - 1;
118  }
119  }
120  eulertour_rmq_ = utils::RangeMinimumQuery( eulertour_levels );
121 }
122 
123 size_t LcaLookup::eulertour_query_( size_t i, size_t j ) const
124 {
125  if( i <= j ) {
126  return eulertour_rmq_.query( i, j );
127  } else {
128  return eulertour_rmq_.query( j, i );
129  }
130 }
131 
132 size_t LcaLookup::lookup_( size_t node_index_a, size_t node_index_b, size_t root_index ) const
133 {
134  auto const sz = eulertour_first_occurrence_.size();
135  if( node_index_a >= sz || node_index_b >= sz || root_index >= sz ) {
136  throw std::invalid_argument( "Invalid index out of bounds for LCA lookup." );
137  }
138 
139  size_t u_euler_idx = eulertour_first_occurrence_[ node_index_a ];
140  size_t v_euler_idx = eulertour_first_occurrence_[ node_index_b ];
141  size_t r_euler_idx = eulertour_first_occurrence_[ root_index ];
142 
143  if( root_index == root_idx_ ) {
144  return eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
145 
146  } else {
147 
148  // Use the "odd man out",
149  // see http://stackoverflow.com/questions/25371865/find-multiple-lcas-in-unrooted-tree
150  size_t candidate_a = eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
151  size_t candidate_b = eulertour_order_[ eulertour_query_( u_euler_idx, r_euler_idx )];
152  size_t candidate_c = eulertour_order_[ eulertour_query_( v_euler_idx, r_euler_idx )];
153 
154  if( candidate_a == candidate_b ) {
155  return candidate_c;
156  } else if( candidate_a == candidate_c ) {
157  return candidate_b;
158  } else {
159  assert( candidate_b == candidate_c );
160  return candidate_a;
161  }
162  }
163 }
164 
165 } // namespace tree
166 } // namespace genesis
genesis::tree::LcaLookup::LcaLookup
LcaLookup()=default
genesis::tree::Tree::node_count
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.hpp:264
node.hpp
eulertour.hpp
tree.hpp
Header of Tree class.
genesis::tree::Tree::node_at
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.hpp:220
genesis::tree::Tree::root_node
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.hpp:300
genesis::tree::LcaLookup::operator()
std::size_t operator()(size_t node_index_a, size_t node_index_b, size_t root_index) const
Definition: lca_lookup.cpp:61
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
genesis::utils::RangeMinimumQuery::IntType
int32_t IntType
Data type of the array for which we want to run RMQ queries.
Definition: range_minimum_query.hpp:75
genesis::tree::TreeNode
Definition: tree/tree/node.hpp:58
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::eulertour
utils::Range< IteratorEulertour< true > > eulertour(ElementType const &element)
Definition: eulertour.hpp:206
genesis::tree::TreeNode::index
size_t index() const
Return the index of this Node.
Definition: tree/tree/node.hpp:102
genesis::utils::RangeMinimumQuery::query
size_t query(size_t i, size_t j) const
Main function for the Range Minimum Query.
Definition: range_minimum_query.cpp:163
distances.hpp
Header of Tree distance methods.
genesis::tree::node_path_length_vector
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node.
Definition: tree/function/distances.cpp:98
lca_lookup.hpp