A toolkit for working with phylogenetic data.
v0.22.1
tree/formats/newick/reader.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 #include "genesis/tree/tree.hpp"
36 
40 
45 
46 #include <cassert>
47 #include <cctype>
48 #include <deque>
49 #include <iostream>
50 #include <memory>
51 #include <sstream>
52 #include <stdexcept>
53 
54 namespace genesis {
55 namespace tree {
56 
57 // =================================================================================================
58 // Reading
59 // =================================================================================================
60 
61 Tree NewickReader::read( std::shared_ptr<utils::BaseInputSource> source ) const
62 {
63  utils::InputStream it( source );
64  return parse_single_tree( it );
65 }
66 
68  std::shared_ptr<utils::BaseInputSource> source,
69  TreeSet& target,
70  std::string const& default_name
71 ) const {
72  utils::InputStream it( source );
73  parse_multiple_trees( it, target, default_name );
74 }
75 
77  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
78  TreeSet& target,
79  std::string const& default_name
80 ) const {
81  for( auto const& source : sources ) {
82  read( source, target, default_name );
83  }
84 }
85 
87  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
88  std::string const& default_name
89 ) const {
90  TreeSet result;
91  for( auto const& source : sources ) {
92  read( source, result, default_name );
93  }
94  return result;
95 }
96 
97 // =================================================================================================
98 // Parse Single Tree
99 // =================================================================================================
100 
102 {
103  // Get name and tree, only use tree.
104  auto tree = parse_named_tree( input_stream ).second;
105 
106  // If we just read this tree, continue until end of stream.
107  if( ! stop_after_semicolon_ ) {
108  parse_trailing_input_( input_stream );
109  }
110 
111  // Return resulting tree.
112  return tree;
113 }
114 
115 // =================================================================================================
116 // Parse Multiple Trees
117 // =================================================================================================
118 
120  utils::InputStream& input_stream,
121  TreeSet& tree_set,
122  std::string const& default_name
123 ) const {
124  // Count how many unnamed trees we have seen.
125  size_t unnamed_ctr = 0;
126 
127  while( input_stream ) {
128 
129  // Get name and tree.
130  auto named_tree = parse_named_tree( input_stream );
131 
132  // If there are no trees left, we are done.
133  if( named_tree.first.empty() && named_tree.second.empty() ) {
134  return;
135  }
136 
137  // Fill in default name if needed.
138  if( named_tree.first.empty() ) {
139  named_tree.first = default_name + std::to_string( unnamed_ctr );
140  ++unnamed_ctr;
141  }
142 
143  // Store it in the TreeSet, without any copy steps.
144  tree_set.add( std::move( named_tree.second ), named_tree.first );
145  }
146 }
147 
148 // =================================================================================================
149 // Parse Named Tree
150 // =================================================================================================
151 
152 std::pair< std::string, Tree > NewickReader::parse_named_tree( utils::InputStream& input_stream ) const
153 {
154  // Helper function for valid tree name chars.
155  auto is_valid_tree_name_char = [&]( char c ){
156  return ::isprint(c)
157  && ! ::isspace(c)
158  && c != ';'
159  && c != '('
160  && c != ')'
161  && c != '='
162  ;
163  };
164 
165  // Skip leading stuff.
166  while( input_stream ) {
167 
168  // Whitespaces.
169  utils::skip_while( input_stream, ::isspace );
170 
171  // No input, return empty tree.
172  // We can never read an empty tree from an input, so this is useful to distungish
173  // whether we were able to read a tree from the input.
174  if( ! input_stream ) {
175  return {};
176  }
177 
178  // Skip comments.
179  if( *input_stream == '[' ) {
180  ++input_stream;
181  utils::read_until( input_stream, ']' );
182 
183  if( !input_stream ) {
184  throw std::runtime_error(
185  "Reached unexpected end of Newick tree at " + input_stream.at()
186  );
187  }
188  assert( *input_stream == ']' );
189  ++input_stream;
190 
191  continue;
192  }
193 
194  // If neither applies, we are done here.
195  break;
196  }
197  assert( input_stream );
198 
199  // Get the name of the current tree, if there is one.
200  std::string name = "";
201  if( *input_stream != '(' ) {
202 
203  // Distinguish between names in quotes, and those without.
204  // Names without quotes cannot contain certain chars, see is_valid_tree_name_char().
205  if( *input_stream == '"' || *input_stream == '\'' ) {
206  name = utils::parse_quoted_string( input_stream, false, true, false );
207  } else {
208  name = utils::read_while( input_stream, is_valid_tree_name_char );
209  }
210 
211  // Always allow white spaces...
212  utils::skip_while( input_stream, ::isspace );
213 
214  // After a name, we expect an equals sign.
215  if( *input_stream != '=' ) {
216  throw std::runtime_error(
217  "Invalid character '" + std::string( 1, *input_stream ) + "' at "
218  + input_stream.at() + ". Expecting '='."
219  );
220  }
221 
222  assert( *input_stream == '=' );
223  ++input_stream;
224 
225  // After a name, there has to be something.
226  if( ! input_stream ) {
227  throw std::runtime_error( "Unexpected end of tree at " + input_stream.at() + "." );
228  }
229  }
230 
231  // Parse the tree and return it.
232  auto broker = parse_tree_to_broker_( input_stream );
233  auto tree = broker_to_tree_destructive( broker );
234  return { name, std::move( tree ) };
235 }
236 
237 // =================================================================================================
238 // Parse Trailing Input
239 // =================================================================================================
240 
241 void NewickReader::parse_trailing_input_( utils::InputStream& input_stream ) const
242 {
243  // Check for more data after the semicolon. We cannot do this check in the parsing function,
244  // as there are cases where we read a Newick tree as part of another file (e.g, Nexus or Jplace),
245  // where it is natural that there is more data after the tree finished.
246  Token ct;
247  ct.type = TokenType::kUnknown;
248  while( input_stream && ct.type != TokenType::kEnd ) {
249  ct = get_next_token_( input_stream );
250  if( ct.type != TokenType::kEnd && ct.type != TokenType::kComment ) {
251  throw std::runtime_error( "Tree contains more data after the semicolon at " + ct.at() );
252  }
253  }
254 }
255 
256 // =================================================================================================
257 // Token Lexing
258 // =================================================================================================
259 
260 NewickReader::Token NewickReader::get_next_token_( utils::InputStream& input_stream ) const
261 {
262  // Prepare result token.
263  Token result;
264 
265  // Shorthand.
266  auto& is = input_stream;
267 
268  // Helper function to distinguish valid chars in a Newick name string.
269  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
270  // "A name can be any string of printable characters except blanks, colons, semicolons,
271  // parentheses, and square brackets." Well, they forgot to mention commas here.
272  // But we knew before that Newick is not a good format anyway...
273  // Also, if enable_tags_ is true, we do not allow {}, as those are used for tags.
274  auto is_valid_name_char = [&]( char c ){
275  return ::isprint(c)
276  && ! ::isspace(c)
277  && c != ':'
278  && c != ';'
279  && c != '('
280  && c != ')'
281  && c != '['
282  && c != ']'
283  && c != ','
284  && ( ! enable_tags_ || ( c != '{' && c != '}' ))
285  ;
286  };
287 
288  // Skip initial whitespace, then set the current position in the stream.
289  // This is where the token begins.
290  utils::skip_while( is, ::isspace );
291  result.line = is.line();
292  result.column = is.column();
293 
294  // Find token type and text from the stream.
295  if( !is ) {
296  result.type = TokenType::kEnd;
297 
298  } else if( *is == '(' ) {
299  result.type = TokenType::kOpeningParenthesis;
300  ++is;
301 
302  } else if( *is == ')' ) {
303  result.type = TokenType::kClosingParenthesis;
304  ++is;
305 
306  } else if( *is == ',' ) {
307  result.type = TokenType::kComma;
308  ++is;
309 
310  } else if( *is == ';' ) {
311  result.type = TokenType::kSemicolon;
312  ++is;
313 
314  } else if( *is == '=' ) {
315  result.type = TokenType::kEquals;
316  ++is;
317 
318  } else if( *is == '[' ) {
319  result.type = TokenType::kComment;
320  ++is;
321  result.text = utils::read_until( is, ']' );
322 
323  if( !is ) {
324  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
325  }
326  assert( *is == ']' );
327  ++is;
328 
329  } else if( *is == ':' ) {
330  result.type = TokenType::kValue;
331  ++is;
332  result.text = utils::parse_number_string( is );
333 
334  } else if( *is == '{' && enable_tags_ ) {
335  result.type = TokenType::kTag;
336  ++is;
337  result.text = utils::read_until( is, '}' );
338 
339  if( !is ) {
340  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
341  }
342  assert( *is == '}' );
343  ++is;
344 
345  } else if( *is == '"' || *is == '\'' ) {
346  result.type = TokenType::kString;
347  result.text = utils::parse_quoted_string( is, false, true, false );
348 
349  } else if( is_valid_name_char( *is )) {
350  result.type = TokenType::kString;
351  result.text = utils::read_while( is, is_valid_name_char );
352 
353  } else {
354  result.type = TokenType::kUnknown;
355  }
356 
357  return result;
358 }
359 
360 // =================================================================================================
361 // Parse Tree into Broker
362 // =================================================================================================
363 
364 NewickBroker NewickReader::parse_tree_to_broker_( utils::InputStream& input_stream ) const
365 {
366  // Create result broker.
367  NewickBroker broker;
368 
369  // Create a node that is currently being populated with data.
370  // This is copied into the broker whenever we finish a tree node.
371  NewickBrokerElement node;
372 
373  // How deep is the current token nested in the tree?
374  int depth = 0;
375 
376  // Was it closed at some point? We want to avoid a tree like "()();" to be parsed!
377  bool closed = false;
378 
379  // Store current token, start with an invalid value to indicate problems.
380  Token ct;
381  ct.type = TokenType::kEnd;
382 
383  // Store previous token.
384  // In the beginning of the loop, we set pt to ct, so that in the first iteration we have
385  // pt == TokenType::kEnd. This is used as indicator that we are in the first iteration.
386  Token pt;
387 
388  // --------------------------------------------------------------
389  // Loop over lexer tokens and check if it...
390  // --------------------------------------------------------------
391 
392  while( input_stream ) {
393  // Init the previous token to what the current token (of the previous iteration) was.
394  // In the first iteration, this inits to the kEnd token.
395  // Then, get the next token.
396  pt = ct;
397  ct = get_next_token_( input_stream );
398 
399  // Treat some special error cases.
400  if( ct.type == TokenType::kUnknown ) {
401  throw std::runtime_error(
402  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
403  );
404  }
405  if( ct.type == TokenType::kEnd ) {
406  break;
407  }
408 
409  // ------------------------------------------------------
410  // is bracket '(' ==> begin of subtree
411  // ------------------------------------------------------
412 
413  if( ct.type == TokenType::kOpeningParenthesis ) {
414  if( pt.type != TokenType::kEnd && !(
415  pt.type == TokenType::kOpeningParenthesis ||
416  pt.type == TokenType::kComma ||
417  pt.type == TokenType::kComment
418  )) {
419  throw std::runtime_error(
420  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
421  );
422  }
423 
424  if (closed) {
425  throw std::runtime_error(
426  "Tree was already closed. Cannot reopen it with '(' at " + ct.at() + "."
427  );
428  }
429 
430  ++depth;
431  continue;
432  }
433 
434  // ------------------------------------------------------
435  // Prepare for all other tokens.
436  // ------------------------------------------------------
437 
438  // if we reach this, the previous condition is not fullfilled (otherwise, the
439  // `continue` statement just above would
440  // have been called). so we have a token other than '(', which means we should already
441  // be somewhere in the tree (or a comment). check, if that is true.
442  if( pt.type == TokenType::kEnd ) {
443 
444  // If it is a comment before the start of the tree, we cannot attach it to any node,
445  // so just skip it and reset the current token to end, so that the next iteration
446  // starts fresh.
447  if( ct.type == TokenType::kComment ) {
448  ct.type = TokenType::kEnd;
449  continue;
450  }
451 
452  throw std::runtime_error( "Tree does not start with '(' at " + ct.at() + "." );
453  }
454 
455  // if we reached this point in code, this means that ct != begin, so it is not the first
456  // iteration in this loop. this means that pt was already set in the loop header (at least
457  // once), which means it now points to a valid token.
458  assert( pt.type != TokenType::kEnd );
459 
460  // set up the node that will be filled with data now.
461  // We use depth == -1 as an indicator whether it is already initialized.
462  // If it is already initialized, this means we are adding more information to it, e.g.
463  // a branch length or a tag. so we do not need to create it.
464  // however, if this node is not initialized, this means we saw a token before that finished
465  // a node and pushed it to the stack (either closing bracket or comma), so we need to init
466  // it here.
467  if( node.depth == -1 ) {
468  node.depth = depth;
469  }
470 
471  // ------------------------------------------------------
472  // is symbol or string ==> label
473  // ------------------------------------------------------
474 
475  if( ct.type == TokenType::kString ) {
476  if (!(
477  pt.type == TokenType::kOpeningParenthesis ||
478  pt.type == TokenType::kClosingParenthesis ||
479  pt.type == TokenType::kComma ||
480  pt.type == TokenType::kComment
481  )) {
482  throw std::runtime_error(
483  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
484  );
485  }
486 
487  // populate the node
488  node.name = ct.text;
489  continue;
490  }
491 
492  // ------------------------------------------------------
493  // is number ==> branch length
494  // ------------------------------------------------------
495 
496  if( ct.type == TokenType::kValue ) {
497  if (!(
498  pt.type == TokenType::kOpeningParenthesis ||
499  pt.type == TokenType::kClosingParenthesis ||
500  pt.type == TokenType::kString ||
501  pt.type == TokenType::kComma ||
502  pt.type == TokenType::kComment
503  )) {
504  throw std::runtime_error(
505  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
506  );
507  }
508 
509  // populate the node
510  node.values.push_back( ct.text );
511  continue;
512  }
513 
514  // ------------------------------------------------------
515  // is tag {} ==> tag
516  // ------------------------------------------------------
517 
518  if( ct.type == TokenType::kTag ) {
519  // in some newick extensions, a tag has a semantic meaning that belongs to the
520  // current node/edge, thus we need to store it
521 
522  // populate the node
523  node.tags.push_back(ct.text);
524  continue;
525  }
526 
527  // ------------------------------------------------------
528  // is comment [] ==> comment
529  // ------------------------------------------------------
530 
531  if( ct.type == TokenType::kComment ) {
532  // in some newick extensions, a comment has a semantic meaning that belongs to the
533  // current node/edge, thus we need to store it
534 
535  // populate the node
536  node.comments.push_back(ct.text);
537  continue;
538  }
539 
540  // ------------------------------------------------------
541  // is comma ',' ==> next subtree
542  // ------------------------------------------------------
543 
544  if( ct.type == TokenType::kComma ) {
545  if (!(
546  pt.type == TokenType::kOpeningParenthesis ||
547  pt.type == TokenType::kClosingParenthesis ||
548  pt.type == TokenType::kString ||
549  pt.type == TokenType::kComma ||
550  pt.type == TokenType::kValue ||
551  pt.type == TokenType::kTag ||
552  pt.type == TokenType::kComment
553  )) {
554  throw std::runtime_error( "Invalid ',' at " + ct.at() + "." );
555  }
556 
557  // Store and finish the current node. Then, make a new, uninitialized one.
558  broker.push_top( node );
559  node = NewickBrokerElement();
560  continue;
561  }
562 
563  // ------------------------------------------------------
564  // is bracket ')' ==> end of subtree
565  // ------------------------------------------------------
566 
567  if( ct.type == TokenType::kClosingParenthesis ) {
568  if (depth == 0) {
569  throw std::runtime_error( "Too many ')' at " + ct.at() + "." );
570  }
571  if (!(
572  pt.type == TokenType::kOpeningParenthesis ||
573  pt.type == TokenType::kClosingParenthesis ||
574  pt.type == TokenType::kString ||
575  pt.type == TokenType::kComma ||
576  pt.type == TokenType::kValue ||
577  pt.type == TokenType::kTag ||
578  pt.type == TokenType::kComment
579  )) {
580  throw std::runtime_error( "Invalid ')' at " + ct.at() + ": '" + ct.text + "'." );
581  }
582 
583  // Store and finish the current node. Then, make a new, uninitialized one.
584  broker.push_top( node );
585  node = NewickBrokerElement();
586 
587  // decrease depth and check if this was the parenthesis that closed the tree
588  --depth;
589  if (depth == 0) {
590  closed = true;
591  }
592  continue;
593  }
594 
595  // ------------------------------------------------------
596  // is semicolon ';' ==> end of tree
597  // ------------------------------------------------------
598 
599  if( ct.type == TokenType::kSemicolon ) {
600  if (depth != 0) {
601  throw std::runtime_error(
602  "Not enough ')' in tree before closing it with ';' at " + ct.at() + "."
603  );
604  }
605  if (!(
606  pt.type == TokenType::kClosingParenthesis ||
607  pt.type == TokenType::kString ||
608  pt.type == TokenType::kValue ||
609  pt.type == TokenType::kTag ||
610  pt.type == TokenType::kComment
611  )) {
612  throw std::runtime_error( "Invalid ';' at " + ct.at() + ": '" + ct.text + "'." );
613  }
614 
615  // Store and finish the current node. Then, make a new, uninitialized one.
616  broker.push_top( node );
617  node = NewickBrokerElement();
618  break;
619  }
620 
621  // If we reach this part of the code, all checkings for token types are done.
622  // as we check for every type that NewickLexer yields, and we use a continue or break
623  // in each of them, we should never reach this point, unless we forgot a type!
624  assert(false);
625  }
626 
627  // Tree has to finish with semicolon. Particularly ct.type == TokenType::kEnd is not allowed
628  // to happen here!
629  if( ct.type != TokenType::kSemicolon ) {
630  throw std::runtime_error( "Tree does not finish with a semicolon." );
631  }
632 
633  return broker;
634 }
635 
636 // =================================================================================================
637 // Broker to Tree
638 // =================================================================================================
639 
641 {
642  // Prepare result and intermediate data.
643  Tree tree;
644  std::vector< TreeLink* > link_stack;
645  broker_to_tree_prepare_( broker, tree );
646 
647  // Iterate over all nodes of the tree broker.
648  for( auto b_itr = broker.cbegin(); b_itr != broker.cend(); ++b_itr ) {
649  broker_to_tree_element_( *b_itr, link_stack, tree );
650  }
651  assert(link_stack.empty());
652 
653  // Finish the work.
654  broker_to_tree_finish_( tree );
655  return tree;
656 }
657 
659 {
660  // Prepare result and intermediate data.
661  Tree tree;
662  std::vector< TreeLink* > link_stack;
663  broker_to_tree_prepare_( broker, tree );
664 
665  // Iterate over all nodes of the tree broker,
666  // then delete the element from the broker to save space.
667  while( ! broker.empty() ) {
668  broker_to_tree_element_( broker.top(), link_stack, tree );
669  broker.pop_top();
670  }
671  assert(link_stack.empty());
672 
673  // Finish the work.
674  broker_to_tree_finish_( tree );
675  return tree;
676 }
677 
678 void NewickReader::broker_to_tree_prepare_( NewickBroker const& broker, Tree& tree ) const
679 {
680  // we need the ranks (number of immediate children) of all nodes
681  broker.assign_ranks();
682 
683  // Call all prepare plugins
684  for( auto const& prepare_plugin : prepare_reading_plugins ) {
685  prepare_plugin( broker, tree );
686  }
687 }
688 
689 void NewickReader::broker_to_tree_element_(
690  NewickBrokerElement const& broker_node,
691  std::vector<TreeLink*>& link_stack,
692  Tree& tree
693 ) const {
694  // Shortcut to tree containers.
695  auto& links = tree.expose_link_container();
696  auto& nodes = tree.expose_node_container();
697  auto& edges = tree.expose_edge_container();
698 
699  // create the tree node for this broker node
700  auto cur_node_u = utils::make_unique< TreeNode >();
701  auto cur_node = cur_node_u.get();
702  cur_node->reset_index( nodes.size() );
703 
704  // Create data pointer, if there is a suitable function. Otherwise, data is
705  // simply a nullptr, i.e., there is no data.
707  create_node_data_plugin( *cur_node );
708  }
709 
710  // Call all node plugins.
711  for( auto const& node_plugin : element_to_node_plugins ) {
712  node_plugin( broker_node, *cur_node );
713  }
714 
715  // Add the node.
716  nodes.push_back(std::move(cur_node_u));
717 
718  // create the link that points towards the root.
719  // this link is created for every node, root, inner and leaves.
720  auto up_link_u = utils::make_unique< TreeLink >();
721  auto up_link = up_link_u.get();
722  up_link->reset_node( cur_node );
723  cur_node->reset_primary_link( up_link );
724  up_link->reset_index( links.size() );
725  links.push_back(std::move(up_link_u));
726 
727  // establish the link towards the root
728  if (link_stack.empty()) {
729  // if the link stack is empty, we are currently at the very beginning of this loop,
730  // which means we are at the root itself. in this case, make the "link towards the root"
731  // point to itself.
732  up_link->reset_outer( up_link );
733  } else {
734  // if we are however in some other node (leaf or inner, but not the root), we establish
735  // the link "upwards" to the root, and back from there.
736  up_link->reset_outer( link_stack.back() );
737  link_stack.back()->reset_outer( up_link );
738 
739  // also, create an edge that connects both nodes
740  auto up_edge = utils::make_unique< TreeEdge >(
741  edges.size(),
742  link_stack.back(),
743  up_link
744  );
745 
746  up_link->reset_edge( up_edge.get() );
747  link_stack.back()->reset_edge( up_edge.get() );
748 
749  // Create data pointer, if there is a suitable function. Otherwise, data is
750  // simply a nullptr, i.e., there is no data.
752  create_edge_data_plugin( *up_edge );
753  }
754 
755  // Call all edge plugins.
756  for( auto const& edge_plugin : element_to_edge_plugins ) {
757  edge_plugin( broker_node, *up_edge );
758  }
759 
760  // Add the edge.
761  edges.push_back(std::move(up_edge));
762 
763  // we can now delete the head of the stack, because we just estiablished its "downlink"
764  // and thus are done with it
765  link_stack.pop_back();
766  }
767 
768  // in the following, we create the links that will connect to the nodes' children.
769  // for leaf nodes, this makes the next pointer point to the node itself (the loop
770  // is never executed in this case, as leaves have rank 0).
771  // for inner nodes, we create as many "down" links as they have children. each of them
772  // is pushed to the stack, so that for the next broker nodes they are available as
773  // reciever for the "up" links.
774  // in summary, make all next pointers of a node point to each other in a circle.
775  auto prev_link = up_link;
776  for (int i = 0; i < broker_node.rank(); ++i) {
777  auto down_link = utils::make_unique< TreeLink >();
778  prev_link->reset_next( down_link.get() );
779  prev_link = down_link.get();
780 
781  down_link->reset_node( cur_node );
782  down_link->reset_index( links.size() );
783  link_stack.push_back(down_link.get());
784  links.push_back(std::move(down_link));
785  }
786  prev_link->reset_next( up_link );
787 }
788 
789 void NewickReader::broker_to_tree_finish_(
790  Tree& tree
791 ) const {
792  // Shortcut to tree containers.
793  auto& links = tree.expose_link_container();
794 
795  // we pushed elements to the link_stack for all children of the nodes and popped them when we
796  // were done processing those children, so there should be no elements left. this assumes that
797  // NewickBroker.assign_ranks() does its job properly!
798  // Already checked in the caller functions!
799  // assert(link_stack.empty());
800 
801  // now delete the uplink of the root, in order to make the tree fully unrooted.
802  // (we do that after the tree creation, as it is way easier this way)
803  assert( &links.front()->outer() == links.front().get() );
804  auto next = &links.front()->next();
805  while( &next->next() != links.front().get() ) {
806  next = &next->next();
807  }
808  next->reset_next( &next->next().next() );
809  assert( &next->next() == &links.front()->next() );
810  links.erase(links.begin());
811  for (size_t i = 0; i < links.size(); ++i) {
812  links[i]->reset_index(i);
813  }
814  next->node().reset_primary_link( &next->next() );
815 
816  // Lastly, make sure to correctly set the root link of the tree.
817  // Assert that the link is already the primary link of its node.
818  assert( links.front().get() == &links.front().get()->node().link() );
819  tree.reset_root_link( links.front().get() );
820 
821  // Call all finish plugins.
822  for( auto const& finish_plugin : finish_reading_plugins ) {
823  finish_plugin( tree );
824  }
825 }
826 
827 // =================================================================================================
828 // Settings
829 // =================================================================================================
830 
832 {
833  enable_tags_ = value;
834  return *this;
835 }
836 
838 {
839  return enable_tags_;
840 }
841 
843 {
844  stop_after_semicolon_ = value;
845  return *this;
846 }
847 
849 {
850  return stop_after_semicolon_;
851 }
852 
853 } // namespace tree
854 } // namespace genesis
std::vector< std::string > values
Numerical values associated with the node, i.e. branch lengths.
Definition: element.hpp:122
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:116
const_iterator cend() const
Const version of end().
Definition: broker.cpp:262
void skip_while(InputStream &source, char criterion)
Lexing function that advances the stream while its current char equals the provided one...
Definition: scanner.hpp:151
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
bool stop_after_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree...
bool empty() const
Returns whether the stack is empty.
Definition: broker.cpp:173
Tree read(std::shared_ptr< utils::BaseInputSource > source) const
Read a single Tree from an input source containing a Newick tree.
Tree broker_to_tree_destructive(NewickBroker &broker) const
Build a Tree from a NewickBroker.
const_iterator cbegin() const
Const version of begin().
Definition: broker.cpp:257
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_after_semicolon(), stop after the semicolon or continue until ...
void push_top(NewickBrokerElement const &node)
Definition: broker.cpp:51
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:384
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
Provides some valuable additions to STD.
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one...
Definition: scanner.hpp:214
NewickBrokerElement & top()
Returns a reference to the top node of the tree stack.
Definition: broker.cpp:213
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
Tree & reset_root_link(TreeLink *root_link)
Reset the link that is considered to be the root of the Tree.
Definition: tree/tree.cpp:184
std::vector< prepare_reading_function > prepare_reading_plugins
std::vector< std::string > comments
Arbitrary strings that can be attached to a node, e.g. in Newick format via "[]". ...
Definition: element.hpp:132
bool enable_tags() const
Return whether currently Newick tags are enabled.
create_edge_data_function create_edge_data_plugin
LinkContainerType & expose_link_container()
Get the container that stores all TreeLinks of the Tree.
Definition: tree/tree.cpp:192
std::vector< std::string > tags
Arbitrary strings that can be attached to a node, e.g. in Newick format via "{}". ...
Definition: element.hpp:127
Provides some commonly used string utility functions.
std::vector< finish_reading_function > finish_reading_plugins
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:48
std::vector< element_to_edge_function > element_to_edge_plugins
std::string read_until(InputStream &source, char criterion)
Lexing function that reads from the stream until its current char equals the provided one...
Definition: scanner.hpp:252
create_node_data_function create_node_data_plugin
Header of Tree class.
long rank() const
Returns the rank (number of immediate children) of this node.
Definition: element.hpp:149
std::string name
Name of the node.
Definition: element.hpp:114
Tree broker_to_tree(NewickBroker const &broker) const
Build a Tree from a NewickBroker.
EdgeContainerType & expose_edge_container()
Get the container that stores all TreeEdges of the Tree.
Definition: tree/tree.cpp:202
Store the information for one element of a Newick tree.
Definition: element.hpp:60
NodeContainerType & expose_node_container()
Get the container that stores all TreeNodes of the Tree.
Definition: tree/tree.cpp:197
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
long depth
Depth of the node in the tree, i.e. its distance from the root.
Definition: element.hpp:137
std::vector< element_to_node_function > element_to_node_plugins