A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bipartition_set.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 #include "genesis/tree/tree.hpp"
38 
41 
42 #include <assert.h>
43 #include <stdexcept>
44 
45 namespace genesis {
46 namespace tree {
47 
48 // -------------------------------------------------------------
49 // Dump and Debug
50 // -------------------------------------------------------------
51 
53 {
54  const size_t num_leaves = leaf_node_count( tree_ );
55  make_index();
56 
57  bipartitions_.clear();
58  bipartitions_.resize(tree_.node_count(), Bipartition(num_leaves));
59 
60  for( auto it : postorder(tree_) ) {
61  if (it.is_last_iteration()) {
62  continue;
63  }
64 
65  Bipartition bp(num_leaves);
66  bp.link_ = &it.link();
67  if (it.node().is_leaf()) {
68  const int leaf_idx = node_to_leaf_map_[it.node().index()];
69  assert(leaf_idx > -1);
70  bp.leaf_nodes_.set(leaf_idx);
71  } else {
72  TreeLink* l = &it.link().next();
73  while( l != &it.link() ) {
74  bp.leaf_nodes_ |= bipartitions_[l->outer().node().index()].leaf_nodes_;
75  l = &l->next();
76  }
77  }
78  bipartitions_[it.node().index()] = bp;
79  }
80 }
81 
83 {
84  leaf_to_node_map_.clear();
85  node_to_leaf_map_.clear();
87 
88  size_t leaf_idx = 0;
89  for (
90  auto it = tree_.begin_nodes();
91  it != tree_.end_nodes();
92  ++it
93  ) {
94  if ((*it)->is_leaf()) {
95  node_to_leaf_map_[(*it)->index()] = leaf_idx;
96  leaf_to_node_map_.push_back((*it)->index());
97  ++leaf_idx;
98  } else {
99  node_to_leaf_map_[(*it)->index()] = -1;
100  }
101  }
102  assert(leaf_idx == leaf_to_node_map_.size());
103 }
104 
114  std::vector<TreeNode*> nodes
115 ) {
116  make();
118 
119  // make bitvector containing all wanted nodes.
120  for (TreeNode* n : nodes) {
121  int leaf_idx = node_to_leaf_map_[n->index()];
122  if (leaf_idx == -1) {
123  throw std::runtime_error(
124  "Node at index " + utils::to_string( n->index() ) + " is not leaf."
125  );
126  }
127  comp.set(leaf_idx);
128  }
129 
130  Bipartition* best_bp = nullptr;
131  size_t min_count = 0;
132 
133  // loop over all bipartitions and compare their bitvectors to the given one, to find one that
134  // is a superset. try both ways (normal and inverted) for each bipartition.
135  for (Bipartition& bp : bipartitions_) {
136  if (!bp.link_) {
137  continue;
138  }
139 
140  if (comp <= bp.leaf_nodes_) {
141  if (min_count == 0 || bp.leaf_nodes_.count() < min_count) {
142  best_bp = &bp;
143  min_count = bp.leaf_nodes_.count();
144  }
145  }
146  if (comp <= ~(bp.leaf_nodes_)) {
147  if (min_count == 0 || (~bp.leaf_nodes_).count() < min_count) {
148  // TODO the invert messes with the data consistency of the bipartition. better make a copy!
149  // TODO also, if there is a class subtree at some better, better return this instead of a bipartition.
150  bp.invert();
151  best_bp = &bp;
152  min_count = bp.leaf_nodes_.count();
153  }
154  }
155  }
156 
157  return best_bp;
158 }
159 
160 std::unordered_set<size_t> BipartitionSet::get_subtree_edges (
161  TreeLink* subtree
162 ) {
163  // std::vector<std::string> leaf_names;
164  std::unordered_set<size_t> ret;
165 
166  // We don't want to use the standard iterator wrapper function here, as we are going
167  // to end the iteration after the end of the subtree, instead of iterating the whole tree.
168  // So we need to use the iterator class directly.
170 
171  for(
172  auto it = Preorder(subtree->next());
173  it != Preorder() && &it.link() != &subtree->outer();
174  ++it
175  ) {
176  // TODO not typesafe!
177  // if( it.node().is_leaf() ) {
178  // leaf_names.push_back( node_data_cast< DefaultNodeData >( it.node() ).name );
179  // }
180  if (it.is_first_iteration()) {
181  continue;
182  }
183  ret.insert( it.edge().index() );
184  }
185 
186  // LOG_DBG << "leaf nodes of subtree:";
187  // for (std::string s : leaf_names) {
188  // LOG_DBG1 << s;
189  // }
190 
191  return ret;
192 }
193 
194 // -------------------------------------------------------------
195 // Dump and Debug
196 // -------------------------------------------------------------
197 
199 {
200  return true;
201 }
202 
203 std::string BipartitionSet::dump()
204 {
205  std::ostringstream out;
206 
207  out << "Node to Leaf Map:\n";
208  for (size_t i = 0; i < node_to_leaf_map_.size(); ++i) {
209  out << " " << i << " --> " << node_to_leaf_map_[i] << "\n";
210  }
211 
212  out << "\nLeaf to Node Map:\n";
213  for (size_t i = 0; i < leaf_to_node_map_.size(); ++i) {
214  out << " " << i << " --> " << leaf_to_node_map_[i] << "\n";
215  }
216 
217  for (Bipartition bi : bipartitions_) {
218  if (!bi.link_) {
219  continue;
220  }
221  out << "\nNode " << bi.link_->node().index()
222  << ", Leaf " << node_to_leaf_map_[bi.link_->node().index()]
223  << "\n" << bi.leaf_nodes_.dump() << "\n";
224  }
225  return out.str();
226 }
227 
228 } // namespace tree
229 } // namespace genesis
std::vector< int > node_to_leaf_map_
std::vector< Bipartition > bipartitions_
size_t count() const
Counts the number of set bits in the Bitvector.
Definition: bitvector.cpp:226
Bipartition * find_smallest_subtree(std::vector< TreeNode * > nodes)
Finds the smallest subtree (measured in number of nodes) that contains all given nodes.
size_t leaf_node_count(Tree const &tree)
Count the number of leaf Nodes of a Tree.
void set(size_t index)
Sets the value of a single bit to true, with boundary check.
Definition: bitvector.hpp:135
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:300
size_t index() const
Return the index of this Node.
Definition: node.cpp:48
IteratorNodes end_nodes()
Definition: tree/tree.cpp:469
std::vector< size_t > leaf_to_node_map_
size_t node_count() const
Return the number of TreeNodes of the Tree.
Definition: tree/tree.cpp:350
std::unordered_set< size_t > get_subtree_edges(TreeLink *subtree)
IteratorNodes begin_nodes()
Definition: tree/tree.cpp:464
Provides some commonly used string utility functions.
utils::Bitvector leaf_nodes_
Definition: bipartition.hpp:86
Provides easy and fast logging functionality.
Header of Tree class.
utils::Range< IteratorPostorder< TreeLink const, TreeNode const, TreeEdge const > > postorder(ElementType const &element)