A library for working with phylogenetic and population genetic data.
v0.27.0
taxonomy/functions/tree.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
40 
41 #include "genesis/tree/tree.hpp"
46 
47 #include <cassert>
48 #include <stdexcept>
49 
50 namespace genesis {
51 namespace taxonomy {
52 
53 // =================================================================================================
54 // Tree
55 // =================================================================================================
56 
60 static void add_subtaxonomy_(
61  Taxonomy const& taxonomy,
62  bool keep_singleton_inner_nodes,
63  bool keep_inner_node_names,
64  int max_level,
65  int parent_level,
66  tree::NewickBroker& broker
67 ) {
68  for( auto const& taxon : taxonomy ) {
69  auto const level = parent_level + 1;
70 
71  if( max_level > -1 && taxon_level( taxon ) >= static_cast<size_t>( max_level )) {
72  // Here, we are at a taxon that is at the max level.
73  // Do not recurse, but add the name of the taxon to the tree. The taxon is possibly
74  // an inner one (phylum, class, etc), which is what we want to happen if there is
75  // a max level set.
76  broker.push_bottom({ taxon.name(), level });
77 
78  } else if( ! keep_singleton_inner_nodes && taxon.size() == 1 ) {
79  // Here, we are at a taxon that only contains a single child taxon, which would
80  // add a branch to the tree that does not furcate in any way. If the option to drop
81  // such entities is set, we simply recurse to the single child, without adding it
82  // to the tree.
84  taxon, keep_singleton_inner_nodes, keep_inner_node_names, max_level,
85  parent_level, broker
86  );
87 
88  } else {
89  // Here, we are in the default case: A taxon to be added to the tree.
90  // Whether we set a name or not depends on whether it is an inner one or has children.
91  std::string const tax_name = (
92  ( ! keep_inner_node_names && taxon.size() > 0 )
93  ? ""
94  : taxon.name()
95  );
96  broker.push_bottom({ tax_name, level });
98  taxon, keep_singleton_inner_nodes, keep_inner_node_names, max_level,
99  level, broker
100  );
101  }
102  }
103 }
104 
106  Taxonomy const& taxonomy,
107  bool keep_singleton_inner_nodes,
108  bool keep_inner_node_names,
109  int max_level
110 ) {
111  using namespace genesis::tree;
112 
113  // Make a broker.
114  // Add a dummy root node if the top leven of the taxonomy contains multiple elements.
115  // This is needed as we otherwise do not have a root node in the broker, but multiple,
116  // which is not allowed.
117  NewickBroker broker;
118  if( taxonomy.size() > 1 ) {
119  broker.push_bottom( NewickBrokerElement{0} );
120  }
121 
122  // Recursively add taxa. We could try something non-recursive here (see below for an example),
123  // but this makes early stopping for the max level a bit difficult.
124  // Taxonomies are not that deep, so this should not yield any trouble at all.
126  taxonomy, keep_singleton_inner_nodes, keep_inner_node_names, max_level, 1, broker
127  );
128  broker.assign_ranks();
129 
130  // Non-recursive way of adding taxa, that however does not easily support
131  // to drop inner branches that do not have any furcation.
132  // for( auto const& taxon_it : preorder( taxonomy )) {
133  // auto const lvl = 1 + static_cast<int>( taxon_level( taxon_it.taxon() ));
134  // broker.push_bottom({ taxon_it.taxon().name(), lvl });
135  // }
136 
137  // We (mis-)use a Common Tree Newick Reader here. It supports to turn a broker into a tree,
138  // and takes names into account. This is what we want. It will create branch lengths, too.
139  // We can live with that. Otherwise we'd have to make a new Tree data type for just this use
140  // case, so not really nice.
141  auto const nr = CommonTreeNewickReader();
142  return nr.broker_to_tree( broker );
143 }
144 
146  Taxonomy const& taxonomy,
147  std::unordered_map<std::string, Taxopath> const& extra_taxa,
148  bool keep_singleton_inner_nodes,
149  bool keep_inner_node_names,
150  int max_level,
151  bool add_extra_taxa_parents
152 ) {
153  // We are lazy. Just make a copy of the whole taxonomy (they are usually not that big),
154  // and add the extra taxa to it. Then, run the standard procedure.
155  auto copy = taxonomy;
156  for( auto const& tp : extra_taxa ) {
157  // Add the necessary parents of the extra taxon, if wanted.
158  if( add_extra_taxa_parents ) {
159  add_from_taxopath( copy, tp.second );
160  }
161 
162  // Find the taxon to add the extra one to.
163  auto const tax = find_taxon_by_taxopath( copy, tp.second );
164  if( ! tax ) {
165  // If we added the parent before, we cannot fail, so assure that a failure
166  // only happens if the adding of taxon parents is disabled.
167  assert( ! add_extra_taxa_parents );
168  auto const path = TaxopathGenerator().to_string( tp.second );
169  throw std::runtime_error( "Taxopath " + path + " not found in Taxonomy." );
170  }
171 
172  // Add the extra taxon
173  assert( tax );
174  tax->add_child( tp.first );
175  }
176 
177  return taxonomy_to_tree( copy, keep_singleton_inner_nodes, keep_inner_node_names, max_level );
178 }
179 
181  std::unordered_map<std::string, Taxopath> const& taxon_map,
182  bool keep_singleton_inner_nodes,
183  bool keep_inner_node_names,
184  int max_level
185 ) {
186  // Use an empty dummy taxonomy that gets filled as needed.
187  Taxonomy tmp;
188  return taxonomy_to_tree(
189  tmp, taxon_map, keep_singleton_inner_nodes, keep_inner_node_names, max_level, true
190  );
191 }
192 
193 } // namespace taxonomy
194 } // namespace genesis
broker.hpp
newick_reader.hpp
taxopath.hpp
genesis::taxonomy::taxonomy_to_tree
tree::Tree taxonomy_to_tree(Taxonomy const &taxonomy, bool keep_singleton_inner_nodes, bool keep_inner_node_names, int max_level)
Turn a Taxonomy into a (possibly multifurcating) Tree.
Definition: taxonomy/functions/tree.cpp:105
genesis::taxonomy::taxon_level
size_t taxon_level(Taxon const &taxon)
Return the level of depth of a given Taxon.
Definition: functions/taxonomy.cpp:78
genesis::tree
Definition: placement/formats/edge_color.hpp:46
tree.hpp
Header of Tree class.
element.hpp
genesis::taxonomy::TaxopathGenerator
Helper class to generate a taxonomic path string from a Taxopath object or a Taxon.
Definition: taxopath_generator.hpp:74
genesis::tree::CommonTreeNewickReader
Read default Newick trees, i.e., trees with names and branch lengths.
Definition: tree/common_tree/newick_reader.hpp:326
taxonomy.hpp
genesis::tree::path
utils::Range< IteratorPath< true > > path(ElementType const &start, ElementType const &finish)
Definition: path.hpp:337
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
taxopath_generator.hpp
taxopath.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::taxonomy::add_from_taxopath
Taxon & add_from_taxopath(Taxonomy &taxonomy, Taxopath const &taxopath, bool expect_parents)
Add a Taxon to a Taxonomy, using the taxonomic elements of a Taxopath.
Definition: taxopath.cpp:76
genesis::taxonomy::Taxonomy
Store a Taxonomy, i.e., a nested hierarchy of Taxa.
Definition: taxonomy/taxonomy.hpp:96
taxon.hpp
genesis::taxonomy::TaxopathGenerator::to_string
std::string to_string(Taxopath const &taxopath) const
Return a string representation of a Taxopath.
Definition: taxopath_generator.cpp:49
genesis::tree::NewickBroker
Stores a Newick tree in an intermediate format that can be further processed into a Tree.
Definition: broker.hpp:106
genesis::taxonomy::Taxonomy::size
size_t size() const
Return the number of immediate child Taxa.
Definition: taxonomy.cpp:84
preorder.hpp
genesis::tree::NewickBroker::assign_ranks
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
reader.hpp
genesis::taxonomy::add_subtaxonomy_
static void add_subtaxonomy_(Taxonomy const &taxonomy, bool keep_singleton_inner_nodes, bool keep_inner_node_names, int max_level, int parent_level, tree::NewickBroker &broker)
Recursive local helper function to add taxa to the tree broker.
Definition: taxonomy/functions/tree.cpp:60
genesis::taxonomy::find_taxon_by_taxopath
Taxon const * find_taxon_by_taxopath(Taxonomy const &tax, Taxopath const &taxopath)
Find a Taxon in a Taxonomy, given its Taxopath.
Definition: taxopath.cpp:120
genesis::tree::NewickBroker::push_bottom
void push_bottom(NewickBrokerElement const &node)
Definition: broker.cpp:61
genesis::tree::NewickBrokerElement
Store the information for one element of a Newick tree.
Definition: element.hpp:60
taxonomy.hpp
tree.hpp