A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
lca_lookup.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
37 
38 #include <algorithm>
39 #include <cassert>
40 #include <limits>
41 #include <stdexcept>
42 
43 namespace genesis {
44 namespace tree {
45 
46 // =================================================================================================
47 // Construction and Rule of Five
48 // =================================================================================================
49 
50 LcaLookup::LcaLookup( Tree const& tree )
51  : tree_( tree )
52 {
53  init_( tree );
54 }
55 
56 // =================================================================================================
57 // Loopup
58 // =================================================================================================
59 
60 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b, size_t root_index ) const
61 {
62  return lookup_( node_index_a, node_index_b, root_index );
63 }
64 
65 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b, TreeNode const& root_node ) const
66 {
67  auto const idx = lookup_( node_a.index(), node_b.index(), root_node.index() );
68  return tree_.node_at( idx );
69 }
70 
71 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b ) const
72 {
73  return lookup_( node_index_a, node_index_b, root_idx_ );
74 }
75 
76 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b ) const
77 {
78  auto const idx = lookup_( node_a.index(), node_b.index(), root_idx_ );
79  return tree_.node_at( idx );
80 }
81 
82 // =================================================================================================
83 // Internal Helper Functions
84 // =================================================================================================
85 
86 void LcaLookup::init_( Tree const& tree )
87 {
88  // Get distances from each node to the root, in number of edges.
89  auto const dists_to_root = node_path_length_vector( tree );
90 
91  // Store root, so that tree can be re-rooted outside of this class.
92  root_idx_ = tree.root_node().index();
93 
94  // Init vectors.
95  std::vector<int> eulertour_levels;
96  eulertour_first_occurrence_.resize( tree.node_count() );
97  std::fill(
98  eulertour_first_occurrence_.begin(),
99  eulertour_first_occurrence_.end(),
100  std::numeric_limits<size_t>::max()
101  );
102 
103  // Fill data structures.
104  for( auto it : eulertour( tree )) {
105  auto const node_idx = it.node().index();
106 
107  eulertour_order_.push_back( node_idx );
108  eulertour_levels.push_back( dists_to_root[ node_idx ]);
109  if( eulertour_first_occurrence_[ node_idx ] == std::numeric_limits<size_t>::max() ) {
110  eulertour_first_occurrence_[ node_idx ] = eulertour_order_.size() - 1;
111  }
112  }
113  eulertour_rmq_ = utils::RangeMinimumQuery( eulertour_levels );
114 }
115 
116 size_t LcaLookup::eulertour_query_( size_t i, size_t j ) const
117 {
118  if( i <= j ) {
119  return eulertour_rmq_.query( i, j );
120  } else {
121  return eulertour_rmq_.query( j, i );
122  }
123 }
124 
125 size_t LcaLookup::lookup_( size_t node_index_a, size_t node_index_b, size_t root_index ) const
126 {
127  auto const sz = eulertour_first_occurrence_.size();
128  if( node_index_a >= sz || node_index_b >= sz || root_index >= sz ) {
129  throw std::invalid_argument( "Invalid index out of bounds for LCA lookup." );
130  }
131 
132  size_t u_euler_idx = eulertour_first_occurrence_[ node_index_a ];
133  size_t v_euler_idx = eulertour_first_occurrence_[ node_index_b ];
134  size_t r_euler_idx = eulertour_first_occurrence_[ root_index ];
135 
136  if( root_index == root_idx_ ) {
137  return eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
138 
139  } else {
140 
141  // Use the "odd man out",
142  // see http://stackoverflow.com/questions/25371865/find-multiple-lcas-in-unrooted-tree
143  size_t candidate_a = eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
144  size_t candidate_b = eulertour_order_[ eulertour_query_( u_euler_idx, r_euler_idx )];
145  size_t candidate_c = eulertour_order_[ eulertour_query_( v_euler_idx, r_euler_idx )];
146 
147  if( candidate_a == candidate_b ) {
148  return candidate_c;
149  } else if( candidate_a == candidate_c ) {
150  return candidate_b;
151  } else {
152  assert( candidate_b == candidate_c );
153  return candidate_a;
154  }
155  }
156 }
157 
158 } // namespace tree
159 } // namespace genesis
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node...
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
size_t index() const
Return the index of this Node.
Definition: node.hpp:100
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
utils::Range< IteratorEulertour< TreeLink const, TreeNode const, TreeEdge const > > eulertour(ElementType const &element)
Definition: eulertour.hpp:190
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
size_t query(size_t i, size_t j) const
Main function for the Range Minimum Query.
std::size_t operator()(size_t node_index_a, size_t node_index_b, size_t root_index) const
Definition: lca_lookup.cpp:60
Header of Tree class.
Header of Tree distance methods.