A toolkit for working with phylogenetic data.
v0.24.0
tree/formats/newick/reader.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 #include "genesis/tree/tree.hpp"
36 
40 
46 
47 #include <cassert>
48 #include <cctype>
49 #include <deque>
50 #include <iostream>
51 #include <memory>
52 #include <sstream>
53 #include <stdexcept>
54 
55 namespace genesis {
56 namespace tree {
57 
58 // =================================================================================================
59 // Reading
60 // =================================================================================================
61 
62 Tree NewickReader::read( std::shared_ptr<utils::BaseInputSource> source ) const
63 {
64  utils::InputStream it( source );
65  return parse_single_tree( it );
66 }
67 
69  std::shared_ptr<utils::BaseInputSource> source,
70  TreeSet& target,
71  std::string const& default_name
72 ) const {
73  utils::InputStream it( source );
74  parse_multiple_trees( it, target, default_name );
75 }
76 
78  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
79  TreeSet& target,
80  std::string const& default_name
81 ) const {
82  for( auto const& source : sources ) {
83  read( source, target, default_name );
84  }
85 }
86 
88  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
89  std::string const& default_name
90 ) const {
91  TreeSet result;
92  for( auto const& source : sources ) {
93  read( source, result, default_name );
94  }
95  return result;
96 }
97 
98 // =================================================================================================
99 // Parse Single Tree
100 // =================================================================================================
101 
103 {
104  // Get name and tree, only use tree.
105  auto tree = parse_named_tree( input_stream ).second;
106 
107  // If we just read this tree, continue until end of stream.
108  if( ! stop_after_semicolon_ ) {
109  parse_trailing_input_( input_stream );
110  }
111 
112  // Return resulting tree.
113  return tree;
114 }
115 
116 // =================================================================================================
117 // Parse Multiple Trees
118 // =================================================================================================
119 
121  utils::InputStream& input_stream,
122  TreeSet& tree_set,
123  std::string const& default_name
124 ) const {
125  // Count how many unnamed trees we have seen.
126  size_t unnamed_ctr = 0;
127 
128  while( input_stream ) {
129 
130  // Get name and tree.
131  auto named_tree = parse_named_tree( input_stream );
132 
133  // If there are no trees left, we are done.
134  if( named_tree.first.empty() && named_tree.second.empty() ) {
135  return;
136  }
137 
138  // Fill in default name if needed.
139  if( named_tree.first.empty() ) {
140  named_tree.first = default_name + std::to_string( unnamed_ctr );
141  ++unnamed_ctr;
142  }
143 
144  // Store it in the TreeSet, without any copy steps.
145  tree_set.add( std::move( named_tree.second ), named_tree.first );
146  }
147 }
148 
149 // =================================================================================================
150 // Parse Named Tree
151 // =================================================================================================
152 
153 std::pair< std::string, Tree > NewickReader::parse_named_tree( utils::InputStream& input_stream ) const
154 {
155  // Helper function for valid tree name chars.
156  auto is_valid_tree_name_char = [&]( char c ){
157  return utils::is_print(c)
158  && ! utils::is_space(c)
159  && c != ';'
160  && c != '('
161  && c != ')'
162  && c != '='
163  ;
164  };
165 
166  // Skip leading stuff.
167  while( input_stream ) {
168 
169  // Whitespaces.
170  utils::skip_while( input_stream, utils::is_space );
171 
172  // No input, return empty tree.
173  // We can never read an empty tree from an input, so this is useful to distungish
174  // whether we were able to read a tree from the input.
175  if( ! input_stream ) {
176  return {};
177  }
178 
179  // Skip comments.
180  if( *input_stream == '[' ) {
181  ++input_stream;
182  utils::read_until( input_stream, ']' );
183 
184  if( !input_stream ) {
185  throw std::runtime_error(
186  "Reached unexpected end of Newick tree at " + input_stream.at()
187  );
188  }
189  assert( *input_stream == ']' );
190  ++input_stream;
191 
192  continue;
193  }
194 
195  // If neither applies, we are done here.
196  break;
197  }
198  assert( input_stream );
199 
200  // Get the name of the current tree, if there is one.
201  std::string name = "";
202  if( *input_stream != '(' ) {
203 
204  // Distinguish between names in quotes, and those without.
205  // Names without quotes cannot contain certain chars, see is_valid_tree_name_char().
206  if( *input_stream == '"' || *input_stream == '\'' ) {
207  name = utils::parse_quoted_string( input_stream, false, true, false );
208  } else {
209  name = utils::read_while( input_stream, is_valid_tree_name_char );
210  }
211 
212  // Always allow white spaces...
213  utils::skip_while( input_stream, utils::is_space );
214 
215  // After a name, we expect an equals sign.
216  if( *input_stream != '=' ) {
217  throw std::runtime_error(
218  "Invalid character '" + std::string( 1, *input_stream ) + "' at "
219  + input_stream.at() + ". Expecting '='."
220  );
221  }
222 
223  assert( *input_stream == '=' );
224  ++input_stream;
225 
226  // After a name, there has to be something.
227  if( ! input_stream ) {
228  throw std::runtime_error( "Unexpected end of tree at " + input_stream.at() + "." );
229  }
230  }
231 
232  // Parse the tree and return it.
233  auto broker = parse_tree_to_broker_( input_stream );
234  auto tree = broker_to_tree_destructive( broker );
235  return { name, std::move( tree ) };
236 }
237 
238 // =================================================================================================
239 // Parse Trailing Input
240 // =================================================================================================
241 
242 void NewickReader::parse_trailing_input_( utils::InputStream& input_stream ) const
243 {
244  // Check for more data after the semicolon. We cannot do this check in the parsing function,
245  // as there are cases where we read a Newick tree as part of another file (e.g, Nexus or Jplace),
246  // where it is natural that there is more data after the tree finished.
247  Token ct;
248  ct.type = TokenType::kUnknown;
249  while( input_stream && ct.type != TokenType::kEnd ) {
250  ct = get_next_token_( input_stream );
251  if( ct.type != TokenType::kEnd && ct.type != TokenType::kComment ) {
252  throw std::runtime_error( "Tree contains more data after the semicolon at " + ct.at() );
253  }
254  }
255 }
256 
257 // =================================================================================================
258 // Token Lexing
259 // =================================================================================================
260 
261 NewickReader::Token NewickReader::get_next_token_( utils::InputStream& input_stream ) const
262 {
263  // Prepare result token.
264  Token result;
265 
266  // Shorthand.
267  auto& is = input_stream;
268 
269  // Helper function to distinguish valid chars in a Newick name string.
270  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
271  // "A name can be any string of printable characters except blanks, colons, semicolons,
272  // parentheses, and square brackets." Well, they forgot to mention commas here.
273  // But we knew before that Newick is not a good format anyway...
274  // Also, if enable_tags_ is true, we do not allow {}, as those are used for tags.
275  auto is_valid_name_char = [&]( char c ){
276  return utils::is_print(c)
277  && ! utils::is_space(c)
278  && c != ':'
279  && c != ';'
280  && c != '('
281  && c != ')'
282  && c != '['
283  && c != ']'
284  && c != ','
285  && ( ! enable_tags_ || ( c != '{' && c != '}' ))
286  ;
287  };
288 
289  // Skip initial whitespace, then set the current position in the stream.
290  // This is where the token begins.
292  result.line = is.line();
293  result.column = is.column();
294 
295  // Find token type and text from the stream.
296  if( !is ) {
297  result.type = TokenType::kEnd;
298 
299  } else if( *is == '(' ) {
300  result.type = TokenType::kOpeningParenthesis;
301  ++is;
302 
303  } else if( *is == ')' ) {
304  result.type = TokenType::kClosingParenthesis;
305  ++is;
306 
307  } else if( *is == ',' ) {
308  result.type = TokenType::kComma;
309  ++is;
310 
311  } else if( *is == ';' ) {
312  result.type = TokenType::kSemicolon;
313  ++is;
314 
315  } else if( *is == '=' ) {
316  result.type = TokenType::kEquals;
317  ++is;
318 
319  } else if( *is == '[' ) {
320  result.type = TokenType::kComment;
321  ++is;
322  result.text = utils::read_until( is, ']' );
323 
324  if( !is ) {
325  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
326  }
327  assert( *is == ']' );
328  ++is;
329 
330  } else if( *is == ':' ) {
331  result.type = TokenType::kValue;
332  ++is;
333  result.text = utils::parse_number_string( is );
334 
335  } else if( *is == '{' && enable_tags_ ) {
336  result.type = TokenType::kTag;
337  ++is;
338  result.text = utils::read_until( is, '}' );
339 
340  if( !is ) {
341  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
342  }
343  assert( *is == '}' );
344  ++is;
345 
346  } else if( *is == '"' || *is == '\'' ) {
347  result.type = TokenType::kString;
348  result.text = utils::parse_quoted_string( is, false, true, false );
349 
350  } else if( is_valid_name_char( *is )) {
351  result.type = TokenType::kString;
352  result.text = utils::read_while( is, is_valid_name_char );
353 
354  } else {
355  result.type = TokenType::kUnknown;
356  }
357 
358  return result;
359 }
360 
361 // =================================================================================================
362 // Parse Tree into Broker
363 // =================================================================================================
364 
365 NewickBroker NewickReader::parse_tree_to_broker_( utils::InputStream& input_stream ) const
366 {
367  // Create result broker.
368  NewickBroker broker;
369 
370  // Create a node that is currently being populated with data.
371  // This is copied into the broker whenever we finish a tree node.
372  NewickBrokerElement node;
373 
374  // How deep is the current token nested in the tree?
375  int depth = 0;
376 
377  // Was it closed at some point? We want to avoid a tree like "()();" to be parsed!
378  bool closed = false;
379 
380  // Store current token, start with an invalid value to indicate problems.
381  Token ct;
382  ct.type = TokenType::kEnd;
383 
384  // Store previous token.
385  // In the beginning of the loop, we set pt to ct, so that in the first iteration we have
386  // pt == TokenType::kEnd. This is used as indicator that we are in the first iteration.
387  Token pt;
388 
389  // --------------------------------------------------------------
390  // Loop over lexer tokens and check if it...
391  // --------------------------------------------------------------
392 
393  while( input_stream ) {
394  // Init the previous token to what the current token (of the previous iteration) was.
395  // In the first iteration, this inits to the kEnd token.
396  // Then, get the next token.
397  pt = ct;
398  ct = get_next_token_( input_stream );
399 
400  // Treat some special error cases.
401  if( ct.type == TokenType::kUnknown ) {
402  throw std::runtime_error(
403  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
404  );
405  }
406  if( ct.type == TokenType::kEnd ) {
407  break;
408  }
409 
410  // ------------------------------------------------------
411  // is bracket '(' ==> begin of subtree
412  // ------------------------------------------------------
413 
414  if( ct.type == TokenType::kOpeningParenthesis ) {
415  if( pt.type != TokenType::kEnd && !(
416  pt.type == TokenType::kOpeningParenthesis ||
417  pt.type == TokenType::kComma ||
418  pt.type == TokenType::kComment
419  )) {
420  throw std::runtime_error(
421  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
422  );
423  }
424 
425  if (closed) {
426  throw std::runtime_error(
427  "Tree was already closed. Cannot reopen it with '(' at " + ct.at() + "."
428  );
429  }
430 
431  ++depth;
432  continue;
433  }
434 
435  // ------------------------------------------------------
436  // Prepare for all other tokens.
437  // ------------------------------------------------------
438 
439  // if we reach this, the previous condition is not fullfilled (otherwise, the
440  // `continue` statement just above would
441  // have been called). so we have a token other than '(', which means we should already
442  // be somewhere in the tree (or a comment). check, if that is true.
443  if( pt.type == TokenType::kEnd ) {
444 
445  // If it is a comment before the start of the tree, we cannot attach it to any node,
446  // so just skip it and reset the current token to end, so that the next iteration
447  // starts fresh.
448  if( ct.type == TokenType::kComment ) {
449  ct.type = TokenType::kEnd;
450  continue;
451  }
452 
453  throw std::runtime_error( "Tree does not start with '(' at " + ct.at() + "." );
454  }
455 
456  // if we reached this point in code, this means that ct != begin, so it is not the first
457  // iteration in this loop. this means that pt was already set in the loop header (at least
458  // once), which means it now points to a valid token.
459  assert( pt.type != TokenType::kEnd );
460 
461  // set up the node that will be filled with data now.
462  // We use depth == -1 as an indicator whether it is already initialized.
463  // If it is already initialized, this means we are adding more information to it, e.g.
464  // a branch length or a tag. so we do not need to create it.
465  // however, if this node is not initialized, this means we saw a token before that finished
466  // a node and pushed it to the stack (either closing bracket or comma), so we need to init
467  // it here.
468  if( node.depth == -1 ) {
469  node.depth = depth;
470  }
471 
472  // ------------------------------------------------------
473  // is symbol or string ==> label
474  // ------------------------------------------------------
475 
476  if( ct.type == TokenType::kString ) {
477  if (!(
478  pt.type == TokenType::kOpeningParenthesis ||
479  pt.type == TokenType::kClosingParenthesis ||
480  pt.type == TokenType::kComma ||
481  pt.type == TokenType::kComment
482  )) {
483  throw std::runtime_error(
484  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
485  );
486  }
487 
488  // populate the node
489  node.name = ct.text;
490  continue;
491  }
492 
493  // ------------------------------------------------------
494  // is number ==> branch length
495  // ------------------------------------------------------
496 
497  if( ct.type == TokenType::kValue ) {
498  if (!(
499  pt.type == TokenType::kOpeningParenthesis ||
500  pt.type == TokenType::kClosingParenthesis ||
501  pt.type == TokenType::kString ||
502  pt.type == TokenType::kComma ||
503  pt.type == TokenType::kComment
504  )) {
505  throw std::runtime_error(
506  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
507  );
508  }
509 
510  // populate the node
511  node.values.push_back( ct.text );
512  continue;
513  }
514 
515  // ------------------------------------------------------
516  // is tag {} ==> tag
517  // ------------------------------------------------------
518 
519  if( ct.type == TokenType::kTag ) {
520  // in some newick extensions, a tag has a semantic meaning that belongs to the
521  // current node/edge, thus we need to store it
522 
523  // populate the node
524  node.tags.push_back(ct.text);
525  continue;
526  }
527 
528  // ------------------------------------------------------
529  // is comment [] ==> comment
530  // ------------------------------------------------------
531 
532  if( ct.type == TokenType::kComment ) {
533  // in some newick extensions, a comment has a semantic meaning that belongs to the
534  // current node/edge, thus we need to store it
535 
536  // populate the node
537  node.comments.push_back(ct.text);
538  continue;
539  }
540 
541  // ------------------------------------------------------
542  // is comma ',' ==> next subtree
543  // ------------------------------------------------------
544 
545  if( ct.type == TokenType::kComma ) {
546  if (!(
547  pt.type == TokenType::kOpeningParenthesis ||
548  pt.type == TokenType::kClosingParenthesis ||
549  pt.type == TokenType::kString ||
550  pt.type == TokenType::kComma ||
551  pt.type == TokenType::kValue ||
552  pt.type == TokenType::kTag ||
553  pt.type == TokenType::kComment
554  )) {
555  throw std::runtime_error( "Invalid ',' at " + ct.at() + "." );
556  }
557 
558  // Store and finish the current node. Then, make a new, uninitialized one.
559  broker.push_top( node );
560  node = NewickBrokerElement();
561  continue;
562  }
563 
564  // ------------------------------------------------------
565  // is bracket ')' ==> end of subtree
566  // ------------------------------------------------------
567 
568  if( ct.type == TokenType::kClosingParenthesis ) {
569  if (depth == 0) {
570  throw std::runtime_error( "Too many ')' at " + ct.at() + "." );
571  }
572  if (!(
573  pt.type == TokenType::kOpeningParenthesis ||
574  pt.type == TokenType::kClosingParenthesis ||
575  pt.type == TokenType::kString ||
576  pt.type == TokenType::kComma ||
577  pt.type == TokenType::kValue ||
578  pt.type == TokenType::kTag ||
579  pt.type == TokenType::kComment
580  )) {
581  throw std::runtime_error( "Invalid ')' at " + ct.at() + ": '" + ct.text + "'." );
582  }
583 
584  // Store and finish the current node. Then, make a new, uninitialized one.
585  broker.push_top( node );
586  node = NewickBrokerElement();
587 
588  // decrease depth and check if this was the parenthesis that closed the tree
589  --depth;
590  if (depth == 0) {
591  closed = true;
592  }
593  continue;
594  }
595 
596  // ------------------------------------------------------
597  // is semicolon ';' ==> end of tree
598  // ------------------------------------------------------
599 
600  if( ct.type == TokenType::kSemicolon ) {
601  if (depth != 0) {
602  throw std::runtime_error(
603  "Not enough ')' in tree before closing it with ';' at " + ct.at() + "."
604  );
605  }
606  if (!(
607  pt.type == TokenType::kClosingParenthesis ||
608  pt.type == TokenType::kString ||
609  pt.type == TokenType::kValue ||
610  pt.type == TokenType::kTag ||
611  pt.type == TokenType::kComment
612  )) {
613  throw std::runtime_error( "Invalid ';' at " + ct.at() + ": '" + ct.text + "'." );
614  }
615 
616  // Store and finish the current node. Then, make a new, uninitialized one.
617  broker.push_top( node );
618  node = NewickBrokerElement();
619  break;
620  }
621 
622  // If we reach this part of the code, all checkings for token types are done.
623  // as we check for every type that NewickLexer yields, and we use a continue or break
624  // in each of them, we should never reach this point, unless we forgot a type!
625  assert(false);
626  }
627 
628  // Tree has to finish with semicolon. Particularly ct.type == TokenType::kEnd is not allowed
629  // to happen here!
630  if( ct.type != TokenType::kSemicolon ) {
631  throw std::runtime_error( "Tree does not finish with a semicolon." );
632  }
633 
634  return broker;
635 }
636 
637 // =================================================================================================
638 // Broker to Tree
639 // =================================================================================================
640 
642 {
643  // Prepare result and intermediate data.
644  Tree tree;
645  std::vector< TreeLink* > link_stack;
646  broker_to_tree_prepare_( broker, tree );
647 
648  // Iterate over all nodes of the tree broker.
649  for( auto b_itr = broker.cbegin(); b_itr != broker.cend(); ++b_itr ) {
650  broker_to_tree_element_( *b_itr, link_stack, tree );
651  }
652  assert(link_stack.empty());
653 
654  // Finish the work.
655  broker_to_tree_finish_( tree );
656  return tree;
657 }
658 
660 {
661  // Prepare result and intermediate data.
662  Tree tree;
663  std::vector< TreeLink* > link_stack;
664  broker_to_tree_prepare_( broker, tree );
665 
666  // Iterate over all nodes of the tree broker,
667  // then delete the element from the broker to save space.
668  while( ! broker.empty() ) {
669  broker_to_tree_element_( broker.top(), link_stack, tree );
670  broker.pop_top();
671  }
672  assert(link_stack.empty());
673 
674  // Finish the work.
675  broker_to_tree_finish_( tree );
676  return tree;
677 }
678 
679 void NewickReader::broker_to_tree_prepare_( NewickBroker const& broker, Tree& tree ) const
680 {
681  // we need the ranks (number of immediate children) of all nodes
682  broker.assign_ranks();
683 
684  // Call all prepare plugins
685  for( auto const& prepare_plugin : prepare_reading_plugins ) {
686  prepare_plugin( broker, tree );
687  }
688 }
689 
690 void NewickReader::broker_to_tree_element_(
691  NewickBrokerElement const& broker_node,
692  std::vector<TreeLink*>& link_stack,
693  Tree& tree
694 ) const {
695  // Shortcut to tree containers.
696  auto& links = tree.expose_link_container();
697  auto& nodes = tree.expose_node_container();
698  auto& edges = tree.expose_edge_container();
699 
700  // create the tree node for this broker node
701  auto cur_node_u = utils::make_unique< TreeNode >();
702  auto cur_node = cur_node_u.get();
703  cur_node->reset_index( nodes.size() );
704 
705  // Create data pointer, if there is a suitable function. Otherwise, data is
706  // simply a nullptr, i.e., there is no data.
708  create_node_data_plugin( *cur_node );
709  }
710 
711  // Call all node plugins.
712  for( auto const& node_plugin : element_to_node_plugins ) {
713  node_plugin( broker_node, *cur_node );
714  }
715 
716  // Add the node.
717  nodes.push_back(std::move(cur_node_u));
718 
719  // create the link that points towards the root.
720  // this link is created for every node, root, inner and leaves.
721  auto up_link_u = utils::make_unique< TreeLink >();
722  auto up_link = up_link_u.get();
723  up_link->reset_node( cur_node );
724  cur_node->reset_primary_link( up_link );
725  up_link->reset_index( links.size() );
726  links.push_back(std::move(up_link_u));
727 
728  // establish the link towards the root
729  if (link_stack.empty()) {
730  // if the link stack is empty, we are currently at the very beginning of this loop,
731  // which means we are at the root itself. in this case, make the "link towards the root"
732  // point to itself.
733  up_link->reset_outer( up_link );
734  } else {
735  // if we are however in some other node (leaf or inner, but not the root), we establish
736  // the link "upwards" to the root, and back from there.
737  up_link->reset_outer( link_stack.back() );
738  link_stack.back()->reset_outer( up_link );
739 
740  // also, create an edge that connects both nodes
741  auto up_edge = utils::make_unique< TreeEdge >(
742  edges.size(),
743  link_stack.back(),
744  up_link
745  );
746 
747  up_link->reset_edge( up_edge.get() );
748  link_stack.back()->reset_edge( up_edge.get() );
749 
750  // Create data pointer, if there is a suitable function. Otherwise, data is
751  // simply a nullptr, i.e., there is no data.
753  create_edge_data_plugin( *up_edge );
754  }
755 
756  // Call all edge plugins.
757  for( auto const& edge_plugin : element_to_edge_plugins ) {
758  edge_plugin( broker_node, *up_edge );
759  }
760 
761  // Add the edge.
762  edges.push_back(std::move(up_edge));
763 
764  // we can now delete the head of the stack, because we just estiablished its "downlink"
765  // and thus are done with it
766  link_stack.pop_back();
767  }
768 
769  // in the following, we create the links that will connect to the nodes' children.
770  // for leaf nodes, this makes the next pointer point to the node itself (the loop
771  // is never executed in this case, as leaves have rank 0).
772  // for inner nodes, we create as many "down" links as they have children. each of them
773  // is pushed to the stack, so that for the next broker nodes they are available as
774  // reciever for the "up" links.
775  // in summary, make all next pointers of a node point to each other in a circle.
776  auto prev_link = up_link;
777  for (int i = 0; i < broker_node.rank(); ++i) {
778  auto down_link = utils::make_unique< TreeLink >();
779  prev_link->reset_next( down_link.get() );
780  prev_link = down_link.get();
781 
782  down_link->reset_node( cur_node );
783  down_link->reset_index( links.size() );
784  link_stack.push_back(down_link.get());
785  links.push_back(std::move(down_link));
786  }
787  prev_link->reset_next( up_link );
788 }
789 
790 void NewickReader::broker_to_tree_finish_(
791  Tree& tree
792 ) const {
793  // Shortcut to tree containers.
794  auto& links = tree.expose_link_container();
795 
796  // we pushed elements to the link_stack for all children of the nodes and popped them when we
797  // were done processing those children, so there should be no elements left. this assumes that
798  // NewickBroker.assign_ranks() does its job properly!
799  // Already checked in the caller functions!
800  // assert(link_stack.empty());
801 
802  // now delete the uplink of the root, in order to make the tree fully unrooted.
803  // (we do that after the tree creation, as it is way easier this way)
804  assert( &links.front()->outer() == links.front().get() );
805  auto next = &links.front()->next();
806  while( &next->next() != links.front().get() ) {
807  next = &next->next();
808  }
809  next->reset_next( &next->next().next() );
810  assert( &next->next() == &links.front()->next() );
811  links.erase(links.begin());
812  for (size_t i = 0; i < links.size(); ++i) {
813  links[i]->reset_index(i);
814  }
815  next->node().reset_primary_link( &next->next() );
816 
817  // Lastly, make sure to correctly set the root link of the tree.
818  // Assert that the link is already the primary link of its node.
819  assert( links.front().get() == &links.front().get()->node().link() );
820  tree.reset_root_link( links.front().get() );
821 
822  // Call all finish plugins.
823  for( auto const& finish_plugin : finish_reading_plugins ) {
824  finish_plugin( tree );
825  }
826 }
827 
828 // =================================================================================================
829 // Settings
830 // =================================================================================================
831 
833 {
834  enable_tags_ = value;
835  return *this;
836 }
837 
839 {
840  return enable_tags_;
841 }
842 
844 {
845  stop_after_semicolon_ = value;
846  return *this;
847 }
848 
850 {
851  return stop_after_semicolon_;
852 }
853 
854 } // namespace tree
855 } // namespace genesis
std::vector< std::string > values
Numerical values associated with the node, i.e. branch lengths.
Definition: element.hpp:122
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:116
const_iterator cend() const
Const version of end().
Definition: broker.cpp:262
void skip_while(InputStream &source, char criterion)
Lexing function that advances the stream while its current char equals the provided one...
Definition: scanner.hpp:153
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
bool stop_after_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree...
bool empty() const
Returns whether the stack is empty.
Definition: broker.cpp:173
Tree read(std::shared_ptr< utils::BaseInputSource > source) const
Read a single Tree from an input source containing a Newick tree.
Tree broker_to_tree_destructive(NewickBroker &broker) const
Build a Tree from a NewickBroker.
const_iterator cbegin() const
Const version of begin().
Definition: broker.cpp:257
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_after_semicolon(), stop after the semicolon or continue until ...
void push_top(NewickBrokerElement const &node)
Definition: broker.cpp:51
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
Provides some valuable additions to STD.
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one...
Definition: scanner.hpp:216
NewickBrokerElement & top()
Returns a reference to the top node of the tree stack.
Definition: broker.cpp:213
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
constexpr bool is_print(char c) noexcept
Return whether a char is a printable character, according to isprint of the cctype header...
Definition: char.hpp:206
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
Tree & reset_root_link(TreeLink *root_link)
Reset the link that is considered to be the root of the Tree.
Definition: tree/tree.cpp:184
std::vector< prepare_reading_function > prepare_reading_plugins
std::vector< std::string > comments
Arbitrary strings that can be attached to a node, e.g. in Newick format via "[]". ...
Definition: element.hpp:132
bool enable_tags() const
Return whether currently Newick tags are enabled.
create_edge_data_function create_edge_data_plugin
LinkContainerType & expose_link_container()
Get the container that stores all TreeLinks of the Tree.
Definition: tree/tree.cpp:192
std::vector< std::string > tags
Arbitrary strings that can be attached to a node, e.g. in Newick format via "{}". ...
Definition: element.hpp:127
Provides some commonly used string utility functions.
std::vector< finish_reading_function > finish_reading_plugins
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:48
std::vector< element_to_edge_function > element_to_edge_plugins
std::string read_until(InputStream &source, char criterion)
Lexing function that reads from the stream until its current char equals the provided one...
Definition: scanner.hpp:254
create_node_data_function create_node_data_plugin
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
Header of Tree class.
long rank() const
Returns the rank (number of immediate children) of this node.
Definition: element.hpp:149
std::string name
Name of the node.
Definition: element.hpp:114
Tree broker_to_tree(NewickBroker const &broker) const
Build a Tree from a NewickBroker.
constexpr bool is_space(char c) noexcept
Return whether a char is some form of white space charater, so either space, tab, new line...
Definition: char.hpp:197
EdgeContainerType & expose_edge_container()
Get the container that stores all TreeEdges of the Tree.
Definition: tree/tree.cpp:202
Store the information for one element of a Newick tree.
Definition: element.hpp:60
NodeContainerType & expose_node_container()
Get the container that stores all TreeNodes of the Tree.
Definition: tree/tree.cpp:197
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
long depth
Depth of the node in the tree, i.e. its distance from the root.
Definition: element.hpp:137
std::vector< element_to_node_function > element_to_node_plugins