A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
function/tree_set.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include "genesis/tree/tree.hpp"
35 
39 
40 #include <stdexcept>
41 #include <vector>
42 
43 namespace genesis {
44 namespace tree {
45 
46 // =================================================================================================
47 // Tree Set Functions
48 // =================================================================================================
49 
54 Tree* find_tree ( TreeSet& tset, std::string const& name)
55 {
56  for (auto& tree : tset) {
57  if( tree.name == name ) {
58  return &tree.tree;
59  }
60  }
61  return nullptr;
62 }
63 
68 Tree const* find_tree ( TreeSet const& tset, std::string const& name)
69 {
70  for (auto& tree : tset) {
71  if( tree.name == name ) {
72  return &tree.tree;
73  }
74  }
75  return nullptr;
76 }
77 
93 {
94  using TreeType = Tree;
95 
96  if( tset.size() == 0 ) {
97  return TreeType();
98  }
99 
100  if( ! all_identical_topology( tset )) {
101  throw std::runtime_error( "Trees in TreeSet do not have the same topology." );
102  }
103 
104  // Prepare storage for average branch lengths.
105  size_t num_edges = tset.at(0).tree.edge_count();
106  auto avgs = std::vector<double>(num_edges, 0.0);
107 
108  // We traverse all trees (again, because all_identical_topology() already did this). This is
109  // probably a bit slower than the previous version of this method which worked with less
110  // traversals, but way easier to understand and debug.
111  for( auto& ct : tset ) {
112  // Use an index for position in the preorder traversal. This makes sure that the
113  // index actually always points to the correct edges, indepently of their order in
114  // different trees in the set.
115  size_t idx = 0;
116 
117  // Do a preorder traversal and collect branch lengths.
118  for( auto it : preorder(ct.tree) ) {
119  // The first iteration points to an edge which will be covered later again.
120  // Skip it to prevent double coverage.
121  if (it.is_first_iteration()) {
122  continue;
123  }
124 
125  avgs[idx] += it.edge().data<DefaultEdgeData>().branch_length;
126  ++idx;
127  }
128  }
129 
130  // We know that all trees have the same topology. So we take a copy of the first one
131  // (thus, also copying its node names) and modify its branch lengths.
132  TreeType tree = TreeType( tset.at(0).tree );
133 
134  // Do the same kind of traversal as before in order to keep the indexing order (preorder) and
135  // set the branch lengths.
136  size_t idx = 0;
137  for( auto it : preorder(tree) ) {
138  // The first iteration points to an edge which will be covered later again.
139  // Skip it to prevent double coverage.
140  if (it.is_first_iteration()) {
141  continue;
142  }
143 
144  it.edge().data<DefaultEdgeData>().branch_length = avgs[idx] / tset.size();
145  ++idx;
146  }
147 
148  return tree;
149 }
150 
151 // =================================================================================================
152 // Comparators
153 // =================================================================================================
154 
161  TreeSet const& tset,
162  std::function<bool( TreeNode const&, TreeNode const& )> node_comparator,
163  std::function<bool( TreeEdge const&, TreeEdge const& )> edge_comparator
164 ) {
165  // If all pairs of two adjacent trees are equal, all of them are.
166  // Thus, we do not need a complete pairwise comparision.
167  // TODO the namespace thing is weird, but currently neccesary because of an ambiguous call...
168  for (size_t i = 1; i < tset.size(); i++) {
169  if( ! tree::equal( tset[i-1].tree, tset[i].tree, node_comparator, edge_comparator )) {
170  return false;
171  }
172  }
173  return true;
174 }
175 
180 // bool all_equal( TreeSet const& tset )
181 // {
182 // // If all pairs of two adjacent trees are equal, all of them are.
183 // // Thus, we do not need a complete pairwise comparision.
184 // for (size_t i = 1; i < tset.size(); i++) {
185 // if( ! equal( tset[i-1].tree, tset[i].tree )) {
186 // return false;
187 // }
188 // }
189 // return true;
190 // }
191 
195 bool all_identical_topology( TreeSet const& tset )
196 {
197  // If all pairs of two adjacent trees have same the topology, all of them have.
198  // Thus, we do not need a complete pairwise comparision.
199  for (size_t i = 1; i < tset.size(); i++) {
200  if( ! identical_topology( tset[i-1].tree, tset[i].tree )) {
201  return false;
202  }
203  }
204  return true;
205 }
206 
207 } // namespace tree
208 } // namespace genesis
Tree operator functions.
utils::Range< IteratorPreorder< TreeLink const, TreeNode const, TreeEdge const > > preorder(ElementType const &element)
Default class containing the commonly needed data for tree edges.
size_t size() const
Return the size of the TreeSet, i.e., the number of stored Trees.
Definition: tree_set.cpp:139
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
size_t edge_count() const
Return the number of TreeEdges of the Tree.
Definition: tree/tree.cpp:358
Tree average_branch_length_tree(TreeSet const &tset)
Return a Tree where the branch lengths are the average of the Trees in the TreeSet, given that they all have the same topology.
Header of Tree class.
Tree * find_tree(TreeSet &tset, std::string const &name)
Get the first Tree in a TreeSet that is stored with a given name, or nullptr if not found...
bool identical_topology(Tree const &lhs, Tree const &rhs)
Returns true iff both trees have an identical topology.
bool equal(Tree const &lhs, Tree const &rhs, std::function< bool(TreeNode const &, TreeNode const &) > node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &) > edge_comparator)
Compares two trees for equality given binary comparator functionals for their nodes and edges...
bool all_identical_topology(TreeSet const &tset)
Compare whether all Trees in a TreeSet are equal using their default comparision operators for nodes ...
NamedTree & at(size_t index)
Definition: tree_set.cpp:108
bool all_equal(TreeSet const &tset, std::function< bool(TreeNode const &, TreeNode const &)> node_comparator, std::function< bool(TreeEdge const &, TreeEdge const &)> edge_comparator)
Compare whether all Trees in a TreeSet are equal using a given comparator functional.