A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
lca_lookup.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
37 
38 #include <algorithm>
39 #include <cassert>
40 #include <limits>
41 
42 namespace genesis {
43 namespace tree {
44 
45 // =================================================================================================
46 // Construction and Rule of Five
47 // =================================================================================================
48 
49 LcaLookup::LcaLookup( Tree const& tree )
50  : tree_( tree )
51 {
52  init_( tree );
53 }
54 
55 // =================================================================================================
56 // Loopup
57 // =================================================================================================
58 
59 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b, size_t root_index ) const
60 {
61  return lookup_( node_index_a, node_index_b, root_index );
62 }
63 
64 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b, TreeNode const& root_node ) const
65 {
66  auto const idx = lookup_( node_a.index(), node_b.index(), root_node.index() );
67  return tree_.node_at( idx );
68 }
69 
70 size_t LcaLookup::operator()( size_t node_index_a, size_t node_index_b ) const
71 {
72  return lookup_( node_index_a, node_index_b, root_idx_ );
73 }
74 
75 TreeNode const& LcaLookup::operator()( TreeNode const& node_a, TreeNode const& node_b ) const
76 {
77  auto const idx = lookup_( node_a.index(), node_b.index(), root_idx_ );
78  return tree_.node_at( idx );
79 }
80 
81 // =================================================================================================
82 // Internal Helper Functions
83 // =================================================================================================
84 
85 void LcaLookup::init_( Tree const& tree )
86 {
87  // Get distances from each node to the root, in number of edges.
88  auto const dists_to_root = node_path_length_vector( tree );
89 
90  // Store root, so that tree can be re-rooted outside of this class.
91  root_idx_ = tree.root_node().index();
92 
93  // Init vectors.
94  std::vector<int> eulertour_levels;
95  eulertour_first_occurrence_.resize( tree.node_count() );
96  std::fill(
97  eulertour_first_occurrence_.begin(),
98  eulertour_first_occurrence_.end(),
99  std::numeric_limits<size_t>::max()
100  );
101 
102  // Fill data structures.
103  for( auto it : eulertour( tree )) {
104  auto const node_idx = it.node().index();
105 
106  eulertour_order_.push_back( node_idx );
107  eulertour_levels.push_back( dists_to_root[ node_idx ]);
108  if( eulertour_first_occurrence_[ node_idx ] == std::numeric_limits<size_t>::max() ) {
109  eulertour_first_occurrence_[ node_idx ] = eulertour_order_.size() - 1;
110  }
111  }
112  eulertour_rmq_ = utils::RangeMinimumQuery( eulertour_levels );
113 }
114 
115 size_t LcaLookup::eulertour_query_( size_t i, size_t j ) const
116 {
117  if( i <= j ) {
118  return eulertour_rmq_.query( i, j );
119  } else {
120  return eulertour_rmq_.query( j, i );
121  }
122 }
123 
124 size_t LcaLookup::lookup_( size_t node_index_a, size_t node_index_b, size_t root_index ) const
125 {
126  size_t u_euler_idx = eulertour_first_occurrence_[ node_index_a ];
127  size_t v_euler_idx = eulertour_first_occurrence_[ node_index_b ];
128  size_t r_euler_idx = eulertour_first_occurrence_[ root_index ];
129 
130  if( root_index == root_idx_ ) {
131  return eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
132 
133  } else {
134 
135  // Use the "odd man out",
136  // see http://stackoverflow.com/questions/25371865/find-multiple-lcas-in-unrooted-tree
137  size_t candidate_a = eulertour_order_[ eulertour_query_( u_euler_idx, v_euler_idx )];
138  size_t candidate_b = eulertour_order_[ eulertour_query_( u_euler_idx, r_euler_idx )];
139  size_t candidate_c = eulertour_order_[ eulertour_query_( v_euler_idx, r_euler_idx )];
140 
141  if( candidate_a == candidate_b ) {
142  return candidate_c;
143  } else if( candidate_a == candidate_c ) {
144  return candidate_b;
145  } else {
146  assert( candidate_b == candidate_c );
147  return candidate_a;
148  }
149  }
150 }
151 
152 } // namespace tree
153 } // namespace genesis
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node...
TreeNode & node_at(size_t index)
Return the TreeNode at a certain index.
Definition: tree/tree.cpp:304
size_t index() const
Return the index of this Node.
Definition: node.cpp:48
TreeNode & root_node()
Return the TreeNode at the current root of the Tree.
Definition: tree/tree.cpp:258
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
utils::Range< IteratorEulertour< TreeLink const, TreeNode const, TreeEdge const > > eulertour(ElementType const &element)
Definition: eulertour.hpp:190
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
size_t query(size_t i, size_t j) const
Main function for the Range Minimum Query.
std::size_t operator()(size_t node_index_a, size_t node_index_b, size_t root_index) const
Definition: lca_lookup.cpp:59
Header of Tree class.
Header of Tree distance methods.