A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/formats/newick/writer.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
37 #include "genesis/tree/tree.hpp"
38 
43 
44 #include <cassert>
45 #include <deque>
46 #include <memory>
47 #include <ostream>
48 #include <sstream>
49 #include <stdexcept>
50 #include <vector>
51 
52 namespace genesis {
53 namespace tree {
54 
55 // =================================================================================================
56 // Writing
57 // =================================================================================================
58 
59 void NewickWriter::to_stream( Tree const& tree, std::ostream& os ) const
60 {
61  broker_to_stream( tree_to_broker( tree ), os );
62 }
63 
65  Tree const& tree, std::string const& filename
66 ) const {
67  std::ofstream ofs;
68  utils::file_output_stream( filename, ofs );
69  to_stream( tree, ofs );
70 }
71 
73  Tree const& tree, std::string& ts
74 ) const {
75  std::ostringstream oss;
76  to_stream( tree, oss );
77  ts = oss.str();
78 }
79 
80 std::string NewickWriter::to_string( Tree const& tree ) const
81 {
82  std::ostringstream oss;
83  to_stream( tree, oss );
84  return oss.str();
85 }
86 
87 // =================================================================================================
88 // Intermediate Functions
89 // =================================================================================================
90 
92 {
93  NewickBroker broker;
94  for( auto const& prepare_plugin : prepare_writing_plugins ) {
95  prepare_plugin( tree, broker );
96  }
97 
98  // store the depth from each node to the root. this is needed to assign levels of depth
99  // to the nodes for the broker.
100  auto depth = node_path_length_vector(tree);
101 
102  // now fill the broker with nodes via postorder traversal, so that the root is put on top last.
103  broker.clear();
104  for( auto it : postorder(tree) ) {
106  bn.depth = depth[ it.node().index() ];
107 
108  for( auto const& node_plugin : node_to_element_plugins ) {
109  node_plugin( it.node(), bn );
110  }
111  // only write edge data to the broker element if it is not the last iteration.
112  // the last iteration is the root, which usually does not have edge information in newick.
113  // caveat: for the root node, the edge will point to an arbitrary edge away from the root.
114  if( !it.is_last_iteration() ) {
115  for( auto const& edge_plugin : edge_to_element_plugins ) {
116  edge_plugin( it.edge(), bn );
117  }
118  }
119 
120  broker.push_top(bn);
121  }
122 
123  broker.assign_ranks();
124  for( auto const& finish_plugin : finish_writing_plugins ) {
125  finish_plugin( tree, broker );
126  }
127  return broker;
128 }
129 
130 void NewickWriter::broker_to_stream( NewickBroker const& broker, std::ostream& os ) const
131 {
132  // Assertion helpers: how many parenthesis were written?
133  size_t op = 0;
134  size_t cp = 0;
135 
136  // Iterate broker in reverse order, because Newick...
137  size_t prev_depth = 0;
138  for( int pos = broker.size() - 1; pos >= 0 ; --pos ) {
139  auto const& elem = broker[pos];
140 
141  // Opening parenthesis.
142  // We open as many as needed to get to the depth of the current element.
143  // They will all be closed when processing the respective parent elements.
144  for( int i = prev_depth; i < elem.depth; ++i ) {
145  os << "(";
146  ++op;
147  }
148  os << element_to_string_( broker[pos] );
149 
150  // Stop if it is the root. Don't have to write parenthesis or commas after the root element.
151  if( pos == 0 ) {
152  continue;
153  }
154 
155  // Closing parenthesis or comma for next element.
156  // Even for "empty" elements (e.g., inner nodes with no names), this is called,
157  // which ensures correct nesting.
158  if( broker[ pos - 1 ].depth == elem.depth - 1 ) {
159  os << ")";
160  ++cp;
161  } else {
162  os << ",";
163  }
164  prev_depth = elem.depth;
165  }
166 
167  // Have to have written as many opening as closing parenthesis.
168  // Use void casts to avoid compiler warnings in release mode.
169  assert( op == cp );
170  (void) op;
171  (void) cp;
172 
173  os << ";";
174 }
175 
176 void NewickWriter::broker_to_file( NewickBroker const& broker, std::string const& filename) const
177 {
178  std::ofstream ofs;
179  utils::file_output_stream( filename, ofs );
180  broker_to_stream( broker, ofs );
181 }
182 
183 void NewickWriter::broker_to_string( NewickBroker const& broker, std::string& ts ) const
184 {
185  std::ostringstream oss;
186  broker_to_stream( broker, oss );
187  ts = oss.str();
188 }
189 
190 std::string NewickWriter::broker_to_string( NewickBroker const& broker ) const
191 {
192  std::ostringstream oss;
193  broker_to_stream( broker, oss );
194  return oss.str();
195 }
196 
197 // =================================================================================================
198 // Internal Functions
199 // =================================================================================================
200 
201 std::string NewickWriter::element_to_string_( NewickBrokerElement const& bn ) const
202 {
203  // Prepare result
204  std::string res;
205 
206  // Write name.
207  if( write_names_ ) {
208  // Find out whether we need to put this name in quotation marks.
209  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
210  // "A name can be any string of printable characters except blanks, colons, semicolons,
211  // parentheses, and square brackets." Well, they forgot to mention commas here.
212  // But we knew before that Newick is not a good format anyway...
213  // Also, if write_tags_ is true, we also quote {}, as those are used for tags.
214  bool need_qmarks = force_quot_marks_;
215  need_qmarks |= ( std::string::npos != bn.name.find_first_of( " :;()[]," ));
216  need_qmarks |= ( write_tags_ && std::string::npos != bn.name.find_first_of( "{}" ));
217 
218  if( need_qmarks ) {
219  res += quotation_marks_ + bn.name + quotation_marks_;
220  } else {
221  res += bn.name;
222  }
223  }
224 
225  // Write values (":...")
226  if( write_values_ ) {
227  for( std::string const& v : bn.values ) {
228  res += ":" + v;
229  }
230  }
231 
232  // Write comments ("[...]")
233  if( write_comments_ ) {
234  for( std::string const& c : bn.comments ) {
235  res += "[" + c + "]";
236  }
237  }
238 
239  // Write tags ("{...}")
240  if( write_tags_ ) {
241  for( std::string const& t : bn.tags ) {
242  res += "{" + t + "}";
243  }
244  }
245 
246  return res;
247 }
248 
249 std::string NewickWriter::to_string_rec_( NewickBroker const& broker, size_t pos ) const
250 {
251  // Old, recursive, slow version. Not used any more.
252 
253  // check if it is a leaf, stop recursion if so.
254  if (broker[pos].rank() == 0) {
255  return element_to_string_(broker[pos]);
256  }
257 
258  // recurse over all children of the current node. while doing so, build a stack of the resulting
259  // substrings in reverse order. this is because newick stores the nodes kind of "backwards",
260  // by starting at a leaf node instead of the root.
261  std::deque<std::string> children;
262  for (size_t i = pos + 1; i < broker.size() && broker[i].depth > broker[pos].depth; ++i) {
263  // skip if not immediate children (those will be called in later recursion steps)
264  if (broker[i].depth > broker[pos].depth + 1) {
265  continue;
266  }
267 
268  // do the recursion step for this child, add the result to a stack
269  children.push_front(to_string_rec_(broker, i));
270  }
271 
272  // build the string by iterating the stack
273  std::ostringstream out;
274  out << "(";
275  for (size_t i = 0; i < children.size(); ++i) {
276  if (i>0) {
277  out << ",";
278  }
279  out << children[i];
280  }
281  out << ")" << element_to_string_(broker[pos]);
282  return out.str();
283 }
284 
285 } // namespace tree
286 } // namespace genesis
std::vector< finish_writing_function > finish_writing_plugins
Collect all functions to be called after finishing the actual tree writing.
std::vector< size_t > node_path_length_vector(Tree const &tree, TreeNode const &node)
Return a vector containing the depth of all nodes with respect to the given start node...
std::vector< std::string > values
Numerical values associated with the node, i.e. branch lengths.
Definition: element.hpp:104
void broker_to_string(NewickBroker const &broker, std::string &ts) const
Gives a Newick string representation of the tree.
void file_output_stream(std::string const &filename, std::ofstream &out_stream, std::ios_base::openmode mode=std::ios_base::out)
Helper function to obtain an output stream to a file.
std::vector< node_to_element_function > node_to_element_plugins
Collect all functions to be called for each TreeNode in order to translate it to a Newick representat...
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
void push_top(NewickBrokerElement const &node)
Definition: broker.cpp:51
int depth
Depth of the node in the tree, i.e. its distance from the root.
Definition: element.hpp:119
void broker_to_file(NewickBroker const &broker, std::string const &filename) const
Writes a NewickBroker to a file in Newick format.
Provides some valuable additions to STD.
NewickBroker tree_to_broker(Tree const &tree) const
Transform the information of the tree into a NewickBroker object.
void to_file(Tree const &tree, std::string const &filename) const
Writes the tree to a file in Newick format.
void broker_to_stream(NewickBroker const &broker, std::ostream &os) const
Write a NewickBroker to a stream, in Newick format.
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
size_t size() const
Returns the size of the stack, i.e. the number of nodes stored in the broker.
Definition: broker.cpp:175
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
std::vector< std::string > comments
Arbitrary strings that can be attached to a node, e.g. in Newick format via "[]". ...
Definition: element.hpp:114
std::vector< prepare_writing_function > prepare_writing_plugins
Collect all functions to be called before starting the actual tree writing.
std::vector< std::string > tags
Arbitrary strings that can be attached to a node, e.g. in Newick format via "{}". ...
Definition: element.hpp:109
void clear()
Deletes all nodes from the broker.
Definition: broker.cpp:46
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
std::vector< edge_to_element_function > edge_to_element_plugins
Collect all functions to be called for each TreeEdge in order to translate it to a Newick representat...
Header of Tree class.
Header of Tree distance methods.
utils::Range< IteratorPostorder< TreeLink const, TreeNode const, TreeEdge const > > postorder(ElementType const &element)
std::string name
Name of the node.
Definition: element.hpp:96
void to_stream(Tree const &tree, std::ostream &os) const
Write a Tree to a stream, in Newick format.
Store the information for one element of a Newick tree.
Definition: element.hpp:60
void to_string(Tree const &tree, std::string &ts) const
Gives a Newick string representation of the tree.