A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/formats/newick/writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 #include "genesis/tree/tree.hpp"
38 
43 
44 #include <assert.h>
45 #include <deque>
46 #include <memory>
47 #include <sstream>
48 #include <stdexcept>
49 #include <vector>
50 
51 namespace genesis {
52 namespace tree {
53 
54 // =================================================================================================
55 // Printing
56 // =================================================================================================
57 
59  Tree const& tree, std::string const& filename
60 ) const {
61  std::string ts;
62  to_string(tree, ts);
63  utils::file_write(ts, filename);
64 }
65 
67  Tree const& tree, std::string& ts
68 ) const {
69  ts = to_string(tree);
70 }
71 
72 std::string NewickWriter::to_string( Tree const& tree ) const
73 {
74  NewickBroker broker;
75  tree_to_broker_(tree, broker);
76  broker.assign_ranks();
77  return to_string_rec_(broker, 0) + ";";
78 }
79 
80 void NewickWriter::tree_to_broker_ (
81  Tree const& tree, NewickBroker& broker
82 ) const {
83  for( auto const& prepare_plugin : prepare_writing_plugins ) {
84  prepare_plugin( tree, broker );
85  }
86 
87  // store the depth from each node to the root. this is needed to assign levels of depth
88  // to the nodes for the broker.
89  auto depth = node_path_length_vector(tree);
90 
91  // now fill the broker with nodes via postorder traversal, so that the root is put on top last.
92  broker.clear();
93  for( auto it : postorder(tree) ) {
94  NewickBrokerElement bn;
95  bn.depth = depth[ it.node().index() ];
96 
97  for( auto const& node_plugin : node_to_element_plugins ) {
98  node_plugin( it.node(), bn );
99  }
100  // only write edge data to the broker element if it is not the last iteration.
101  // the last iteration is the root, which usually does not have edge information in newick.
102  // caveat: for the root node, the edge will point to an arbitrary edge away from the root.
103  if( !it.is_last_iteration() ) {
104  for( auto const& edge_plugin : edge_to_element_plugins ) {
105  edge_plugin( it.edge(), bn );
106  }
107  }
108 
109  broker.push_top(bn);
110  }
111 
112  for( auto const& finish_plugin : finish_writing_plugins ) {
113  finish_plugin( tree, broker );
114  }
115 }
116 
117 std::string NewickWriter::element_to_string_( NewickBrokerElement const& bn ) const
118 {
119  // Prepare result
120  std::string res;
121 
122  // Write name.
123  if( write_names_ ) {
124  // Find out whether we need to put this name in quotation marks.
125  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
126  // "A name can be any string of printable characters except blanks, colons, semicolons,
127  // parentheses, and square brackets." Well, they forgot to mention commas here.
128  // But we knew before that Newick is not a good format anyway...
129  // Also, if write_tags_ is true, we also quote {}, as those are used for tags.
130  bool need_qmarks = false;
131  need_qmarks |= ( std::string::npos != bn.name.find_first_of( " :;()[]," ));
132  need_qmarks |= ( write_tags_ && std::string::npos != bn.name.find_first_of( "{}" ));
133 
134  if( need_qmarks ) {
135  res += quotation_marks_ + bn.name + quotation_marks_;
136  } else {
137  res += bn.name;
138  }
139  }
140 
141  // Write values (":...")
142  if( write_values_ ) {
143  for( std::string const& v : bn.values ) {
144  res += ":" + v;
145  }
146  }
147 
148  // Write comments ("[...]")
149  if( write_comments_ ) {
150  for( std::string const& c : bn.comments ) {
151  res += "[" + c + "]";
152  }
153  }
154 
155  // Write tags ("{...}")
156  if( write_tags_ ) {
157  for( std::string const& t : bn.tags ) {
158  res += "{" + t + "}";
159  }
160  }
161 
162  return res;
163 }
164 
165 std::string NewickWriter::to_string_rec_( NewickBroker const& broker, size_t pos ) const
166 {
167  // check if it is a leaf, stop recursion if so.
168  if (broker[pos].rank() == 0) {
169  return element_to_string_(broker[pos]);
170  }
171 
172  // recurse over all children of the current node. while doing so, build a stack of the resulting
173  // substrings in reverse order. this is because newick stores the nodes kind of "backwards",
174  // by starting at a leaf node instead of the root.
175  std::deque<std::string> children;
176  for (size_t i = pos + 1; i < broker.size() && broker[i].depth > broker[pos].depth; ++i) {
177  // skip if not immediate children (those will be called in later recursion steps)
178  if (broker[i].depth > broker[pos].depth + 1) {
179  continue;
180  }
181 
182  // do the recursion step for this child, add the result to a stack
183  children.push_front(to_string_rec_(broker, i));
184  }
185 
186  // build the string by iterating the stack
187  std::ostringstream out;
188  out << "(";
189  for (size_t i = 0; i < children.size(); ++i) {
190  if (i>0) {
191  out << ",";
192  }
193  out << children[i];
194  }
195  out << ")" << element_to_string_(broker[pos]);
196  return out.str();
197 }
198 
199 } // namespace tree
200 } // namespace genesis
std::vector< finish_writing_function > finish_writing_plugins
Collect all functions to be called after finishing the actual tree writing.
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node...
std::vector< node_to_element_function > node_to_element_plugins
Collect all functions to be called for each TreeNode in order to translate it to a Newick representat...
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
void push_top(NewickBrokerElement const &node)
Definition: broker.cpp:51
Provides some valuable additions to STD.
void to_file(Tree const &tree, std::string const &filename) const
Writes the tree to a file in Newick format.
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
std::vector< prepare_writing_function > prepare_writing_plugins
Collect all functions to be called before starting the actual tree writing.
void clear()
Deletes all nodes from the broker.
Definition: broker.cpp:46
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
std::vector< edge_to_element_function > edge_to_element_plugins
Collect all functions to be called for each TreeEdge in order to translate it to a Newick representat...
Header of Tree class.
Header of Tree distance methods.
utils::Range< IteratorPostorder< TreeLink const, TreeNode const, TreeEdge const > > postorder(ElementType const &element)
void file_write(std::string const &content, std::string const &filename)
Write the content of a string to a file.
Definition: fs.cpp:106
void to_string(Tree const &tree, std::string &ts) const
Gives a Newick string representation of the tree.