A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/formats/newick/reader.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 #include "genesis/tree/tree.hpp"
36 
40 
45 
46 #include <cassert>
47 #include <deque>
48 #include <memory>
49 #include <iostream>
50 #include <sstream>
51 #include <stdexcept>
52 
53 namespace genesis {
54 namespace tree {
55 
56 // =================================================================================================
57 // Reading a single Tree
58 // =================================================================================================
59 
60 Tree NewickReader::from_stream( std::istream& input_stream ) const
61 {
62  utils::InputStream it( utils::make_unique< utils::StreamInputSource >( input_stream ));
63  return parse_single_tree( it );
64 }
65 
66 Tree NewickReader::from_file( std::string const& filename ) const
67 {
68  utils::InputStream it( utils::make_unique< utils::FileInputSource >( filename ));
69  return parse_single_tree( it );
70 }
71 
72 Tree NewickReader::from_string( std::string const& tree_string ) const
73 {
74  utils::InputStream it( utils::make_unique< utils::StringInputSource >( tree_string ));
75  return parse_single_tree( it );
76 }
77 
78 // =================================================================================================
79 // Reading into a TreeSet
80 // =================================================================================================
81 
83  std::istream& input_stream,
85  std::string const& default_name
86 ) const {
87  utils::InputStream it( utils::make_unique< utils::StreamInputSource >( input_stream ));
88  parse_multiple_trees( it, tree_set, default_name );
89 }
90 
92  std::string const& filename,
94  std::string const& default_name
95 ) const {
96  utils::InputStream it( utils::make_unique< utils::FileInputSource >( filename ));
97  parse_multiple_trees( it, tree_set, default_name );
98 }
99 
101  std::string const& tree_string,
102  TreeSet& tree_set,
103  std::string const& default_name
104 ) const {
105  utils::InputStream it( utils::make_unique< utils::StringInputSource >( tree_string ));
106  parse_multiple_trees( it, tree_set, default_name );
107 }
108 
109 // =================================================================================================
110 // Reading multiple input sources
111 // =================================================================================================
112 
114  std::vector<std::string> const& filenames,
116 ) const {
117  for( auto const& fn : filenames ) {
118  from_file( fn, tree_set );
119  }
120 }
121 
123  std::vector<std::string> const& tree_strings,
124  TreeSet& tree_set,
125  std::string const& default_name
126 ) const {
127  for( auto const& ts : tree_strings ) {
128  from_string (ts, tree_set, default_name);
129  }
130 }
131 
132 // =================================================================================================
133 // Parse Single Tree
134 // =================================================================================================
135 
137 {
138  // Get name and tree, only use tree.
139  auto tree = parse_named_tree( input_stream ).second;
140 
141  // If we just read this tree, continue until end of stream.
142  if( ! stop_at_semicolon_ ) {
143  parse_trailing_input_( input_stream );
144  }
145 
146  // Return resulting tree.
147  return tree;
148 }
149 
150 // =================================================================================================
151 // Parse Multiple Trees
152 // =================================================================================================
153 
155  utils::InputStream& input_stream,
156  TreeSet& tree_set,
157  std::string const& default_name
158 ) const {
159  // Count how many unnamed trees we have seen.
160  size_t unnamed_ctr = 0;
161 
162  while( input_stream ) {
163 
164  // Get name and tree.
165  auto named_tree = parse_named_tree( input_stream );
166 
167  // If there are no trees left, we are done.
168  if( named_tree.first.empty() && named_tree.second.empty() ) {
169  return;
170  }
171 
172  // Fill in default name if needed.
173  if( named_tree.first.empty() ) {
174  named_tree.first = default_name + std::to_string( unnamed_ctr );
175  ++unnamed_ctr;
176  }
177 
178  // Store it in the TreeSet, without any copy steps.
179  tree_set.add( std::move( named_tree.first ), std::move( named_tree.second ));
180  }
181 }
182 
183 // =================================================================================================
184 // Parse Named Tree
185 // =================================================================================================
186 
187 std::pair< std::string, Tree > NewickReader::parse_named_tree( utils::InputStream& input_stream ) const
188 {
189  // Helper function for valid tree name chars.
190  auto is_valid_tree_name_char = [&]( char c ){
191  return isprint(c)
192  && ! isspace(c)
193  && c != ';'
194  && c != '('
195  && c != ')'
196  && c != '='
197  ;
198  };
199 
200  // Skip leading stuff.
201  while( input_stream ) {
202 
203  // Whitespaces.
204  utils::skip_while( input_stream, isspace );
205 
206  // No input, return empty tree.
207  // We can never read an empty tree from an input, so this is useful to distungish
208  // whether we were able to read a tree from the input.
209  if( ! input_stream ) {
210  return {};
211  }
212 
213  // Skip comments.
214  if( *input_stream == '[' ) {
215  ++input_stream;
216  utils::read_until( input_stream, ']' );
217 
218  if( !input_stream ) {
219  throw std::runtime_error(
220  "Reached unexpected end of Newick tree at " + input_stream.at()
221  );
222  }
223  assert( *input_stream == ']' );
224  ++input_stream;
225 
226  continue;
227  }
228 
229  // If neither applies, we are done here.
230  break;
231  }
232  assert( input_stream );
233 
234  // Get the name of the current tree, if there is one.
235  std::string name = "";
236  if( *input_stream != '(' ) {
237 
238  // Distinguish between names in quotes, and those without.
239  // Names without quotes cannot contain certain chars, see is_valid_tree_name_char().
240  if( *input_stream == '"' || *input_stream == '\'' ) {
241  name = utils::parse_quoted_string( input_stream, false, true, false );
242  } else {
243  name = utils::read_while( input_stream, is_valid_tree_name_char );
244  }
245 
246  // Always allow white spaces...
247  utils::skip_while( input_stream, isspace );
248 
249  // After a name, we expect an equals sign.
250  if( *input_stream != '=' ) {
251  throw std::runtime_error(
252  "Invalid character '" + std::string( 1, *input_stream ) + "' at "
253  + input_stream.at() + ". Expecting '='."
254  );
255  }
256 
257  assert( *input_stream == '=' );
258  ++input_stream;
259 
260  // After a name, there has to be something.
261  if( ! input_stream ) {
262  throw std::runtime_error( "Unexpected end of tree at " + input_stream.at() + "." );
263  }
264  }
265 
266  // Parse the tree and return it.
267  return { name, broker_to_tree_( parse_tree_to_broker_( input_stream )) };
268 }
269 
270 // =================================================================================================
271 // Parse Trailing Input
272 // =================================================================================================
273 
274 void NewickReader::parse_trailing_input_( utils::InputStream& input_stream ) const
275 {
276  // Check for more data after the semicolon. We cannot do this check in the parsing function,
277  // as there are cases where we read a Newick tree as part of another file (e.g, Nexus or Jplace),
278  // where it is natural that there is more data after the tree finished.
279  Token ct;
280  ct.type = TokenType::kUnknown;
281  while( input_stream && ct.type != TokenType::kEnd ) {
282  ct = get_next_token_( input_stream );
283  if( ct.type != TokenType::kEnd && ct.type != TokenType::kComment ) {
284  throw std::runtime_error( "Tree contains more data after the semicolon at " + ct.at() );
285  }
286  }
287 }
288 
289 // =================================================================================================
290 // Token Lexing
291 // =================================================================================================
292 
293 NewickReader::Token NewickReader::get_next_token_( utils::InputStream& input_stream ) const
294 {
295  // Prepare result token.
296  Token result;
297 
298  // Shorthand.
299  auto& is = input_stream;
300 
301  // Helper function to distinguish valid chars in a Newick name string.
302  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
303  // "A name can be any string of printable characters except blanks, colons, semicolons,
304  // parentheses, and square brackets." Well, they forgot to mention commas here.
305  // But we knew before that Newick is not a good format anyway...
306  // Also, if enable_tags_ is true, we do not allow {}, as those are used for tags.
307  auto is_valid_name_char = [&]( char c ){
308  return isprint(c)
309  && ! isspace(c)
310  && c != ':'
311  && c != ';'
312  && c != '('
313  && c != ')'
314  && c != '['
315  && c != ']'
316  && c != ','
317  && ( ! enable_tags_ || ( c != '{' && c != '}' ))
318  ;
319  };
320 
321  // Skip initial whitespace, then set the current position in the stream.
322  // This is where the token begins.
323  utils::skip_while( is, isspace );
324  result.line = is.line();
325  result.column = is.column();
326 
327  // Find token type and text from the stream.
328  if( !is ) {
329  result.type = TokenType::kEnd;
330 
331  } else if( *is == '(' ) {
332  result.type = TokenType::kOpeningParenthesis;
333  ++is;
334 
335  } else if( *is == ')' ) {
336  result.type = TokenType::kClosingParenthesis;
337  ++is;
338 
339  } else if( *is == ',' ) {
340  result.type = TokenType::kComma;
341  ++is;
342 
343  } else if( *is == ';' ) {
344  result.type = TokenType::kSemicolon;
345  ++is;
346 
347  } else if( *is == '=' ) {
348  result.type = TokenType::kEquals;
349  ++is;
350 
351  } else if( *is == '[' ) {
352  result.type = TokenType::kComment;
353  ++is;
354  result.text = utils::read_until( is, ']' );
355 
356  if( !is ) {
357  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
358  }
359  assert( *is == ']' );
360  ++is;
361 
362  } else if( *is == ':' ) {
363  result.type = TokenType::kValue;
364  ++is;
365  result.text = utils::parse_number_string( is );
366 
367  } else if( *is == '{' && enable_tags_ ) {
368  result.type = TokenType::kTag;
369  ++is;
370  result.text = utils::read_until( is, '}' );
371 
372  if( !is ) {
373  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
374  }
375  assert( *is == '}' );
376  ++is;
377 
378  } else if( *is == '"' || *is == '\'' ) {
379  result.type = TokenType::kString;
380  result.text = utils::parse_quoted_string( is, false, true, false );
381 
382  } else if( is_valid_name_char( *is )) {
383  result.type = TokenType::kString;
384  result.text = utils::read_while( is, is_valid_name_char );
385 
386  } else {
387  result.type = TokenType::kUnknown;
388  }
389 
390  return result;
391 }
392 
393 // =================================================================================================
394 // Parse Tree into Broker
395 // =================================================================================================
396 
397 NewickBroker NewickReader::parse_tree_to_broker_( utils::InputStream& input_stream ) const
398 {
399  // Create result broker.
400  NewickBroker broker;
401 
402  // Create a node that is currently being populated with data.
403  // This is copied into the broker whenever we finish a tree node.
404  NewickBrokerElement node;
405 
406  // How deep is the current token nested in the tree?
407  int depth = 0;
408 
409  // Was it closed at some point? We want to avoid a tree like "()();" to be parsed!
410  bool closed = false;
411 
412  // Store current token, start with an invalid value to indicate problems.
413  Token ct;
414  ct.type = TokenType::kEnd;
415 
416  // Store previous token.
417  // In the beginning of the loop, we set pt to ct, so that in the first iteration we have
418  // pt == TokenType::kEnd. This is used as indicator that we are in the first iteration.
419  Token pt;
420 
421  // --------------------------------------------------------------
422  // Loop over lexer tokens and check if it...
423  // --------------------------------------------------------------
424 
425  while( input_stream ) {
426  // Init the previous token to what the current token (of the previous iteration) was.
427  // In the first iteration, this inits to the kEnd token.
428  // Then, get the next token.
429  pt = ct;
430  ct = get_next_token_( input_stream );
431 
432  // Treat some special error cases.
433  if( ct.type == TokenType::kUnknown ) {
434  throw std::runtime_error(
435  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
436  );
437  }
438  if( ct.type == TokenType::kEnd ) {
439  break;
440  }
441 
442  // ------------------------------------------------------
443  // is bracket '(' ==> begin of subtree
444  // ------------------------------------------------------
445 
446  if( ct.type == TokenType::kOpeningParenthesis ) {
447  if( pt.type != TokenType::kEnd && !(
448  pt.type == TokenType::kOpeningParenthesis ||
449  pt.type == TokenType::kComma ||
450  pt.type == TokenType::kComment
451  )) {
452  throw std::runtime_error(
453  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
454  );
455  }
456 
457  if (closed) {
458  throw std::runtime_error(
459  "Tree was already closed. Cannot reopen it with '(' at " + ct.at() + "."
460  );
461  }
462 
463  ++depth;
464  continue;
465  }
466 
467  // ------------------------------------------------------
468  // Prepare for all other tokens.
469  // ------------------------------------------------------
470 
471  // if we reach this, the previous condition is not fullfilled (otherwise, the
472  // `continue` statement just above would
473  // have been called). so we have a token other than '(', which means we should already
474  // be somewhere in the tree (or a comment). check, if that is true.
475  if( pt.type == TokenType::kEnd ) {
476 
477  // If it is a comment before the start of the tree, we cannot attach it to any node,
478  // so just skip it and reset the current token to end, so that the next iteration
479  // starts fresh.
480  if( ct.type == TokenType::kComment ) {
481  ct.type = TokenType::kEnd;
482  continue;
483  }
484 
485  throw std::runtime_error( "Tree does not start with '(' at " + ct.at() + "." );
486  }
487 
488  // if we reached this point in code, this means that ct != begin, so it is not the first
489  // iteration in this loop. this means that pt was already set in the loop header (at least
490  // once), which means it now points to a valid token.
491  assert( pt.type != TokenType::kEnd );
492 
493  // set up the node that will be filled with data now.
494  // We use depth == -1 as an indicator whether it is already initialized.
495  // If it is already initialized, this means we are adding more information to it, e.g.
496  // a branch length or a tag. so we do not need to create it.
497  // however, if this node is not initialized, this means we saw a token before that finished
498  // a node and pushed it to the stack (either closing bracket or comma), so we need to init
499  // it here.
500  if( node.depth == -1 ) {
501  node.depth = depth;
502  }
503 
504  // ------------------------------------------------------
505  // is symbol or string ==> label
506  // ------------------------------------------------------
507 
508  if( ct.type == TokenType::kString ) {
509  if (!(
510  pt.type == TokenType::kOpeningParenthesis ||
511  pt.type == TokenType::kClosingParenthesis ||
512  pt.type == TokenType::kComma ||
513  pt.type == TokenType::kComment
514  )) {
515  throw std::runtime_error(
516  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
517  );
518  }
519 
520  // populate the node
521  node.name = ct.text;
522  continue;
523  }
524 
525  // ------------------------------------------------------
526  // is number ==> branch length
527  // ------------------------------------------------------
528 
529  if( ct.type == TokenType::kValue ) {
530  if (!(
531  pt.type == TokenType::kOpeningParenthesis ||
532  pt.type == TokenType::kClosingParenthesis ||
533  pt.type == TokenType::kString ||
534  pt.type == TokenType::kComma ||
535  pt.type == TokenType::kComment
536  )) {
537  throw std::runtime_error(
538  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
539  );
540  }
541 
542  // populate the node
543  node.values.push_back( ct.text );
544  continue;
545  }
546 
547  // ------------------------------------------------------
548  // is tag {} ==> tag
549  // ------------------------------------------------------
550 
551  if( ct.type == TokenType::kTag ) {
552  // in some newick extensions, a tag has a semantic meaning that belongs to the
553  // current node/edge, thus we need to store it
554 
555  // populate the node
556  node.tags.push_back(ct.text);
557  continue;
558  }
559 
560  // ------------------------------------------------------
561  // is comment [] ==> comment
562  // ------------------------------------------------------
563 
564  if( ct.type == TokenType::kComment ) {
565  // in some newick extensions, a comment has a semantic meaning that belongs to the
566  // current node/edge, thus we need to store it
567 
568  // populate the node
569  node.comments.push_back(ct.text);
570  continue;
571  }
572 
573  // ------------------------------------------------------
574  // is comma ',' ==> next subtree
575  // ------------------------------------------------------
576 
577  if( ct.type == TokenType::kComma ) {
578  if (!(
579  pt.type == TokenType::kOpeningParenthesis ||
580  pt.type == TokenType::kClosingParenthesis ||
581  pt.type == TokenType::kString ||
582  pt.type == TokenType::kComma ||
583  pt.type == TokenType::kValue ||
584  pt.type == TokenType::kTag ||
585  pt.type == TokenType::kComment
586  )) {
587  throw std::runtime_error( "Invalid ',' at " + ct.at() + "." );
588  }
589 
590  // Store and finish the current node. Then, make a new, uninitialized one.
591  broker.push_top( node );
592  node = NewickBrokerElement();
593  continue;
594  }
595 
596  // ------------------------------------------------------
597  // is bracket ')' ==> end of subtree
598  // ------------------------------------------------------
599 
600  if( ct.type == TokenType::kClosingParenthesis ) {
601  if (depth == 0) {
602  throw std::runtime_error( "Too many ')' at " + ct.at() + "." );
603  }
604  if (!(
605  pt.type == TokenType::kOpeningParenthesis ||
606  pt.type == TokenType::kClosingParenthesis ||
607  pt.type == TokenType::kString ||
608  pt.type == TokenType::kComma ||
609  pt.type == TokenType::kValue ||
610  pt.type == TokenType::kTag ||
611  pt.type == TokenType::kComment
612  )) {
613  throw std::runtime_error( "Invalid ')' at " + ct.at() + ": '" + ct.text + "'." );
614  }
615 
616  // Store and finish the current node. Then, make a new, uninitialized one.
617  broker.push_top( node );
618  node = NewickBrokerElement();
619 
620  // decrease depth and check if this was the parenthesis that closed the tree
621  --depth;
622  if (depth == 0) {
623  closed = true;
624  }
625  continue;
626  }
627 
628  // ------------------------------------------------------
629  // is semicolon ';' ==> end of tree
630  // ------------------------------------------------------
631 
632  if( ct.type == TokenType::kSemicolon ) {
633  if (depth != 0) {
634  throw std::runtime_error(
635  "Not enough ')' in tree before closing it with ';' at " + ct.at() + "."
636  );
637  }
638  if (!(
639  pt.type == TokenType::kClosingParenthesis ||
640  pt.type == TokenType::kString ||
641  pt.type == TokenType::kValue ||
642  pt.type == TokenType::kTag ||
643  pt.type == TokenType::kComment
644  )) {
645  throw std::runtime_error( "Invalid ';' at " + ct.at() + ": '" + ct.text + "'." );
646  }
647 
648  // Store and finish the current node. Then, make a new, uninitialized one.
649  broker.push_top( node );
650  node = NewickBrokerElement();
651  break;
652  }
653 
654  // If we reach this part of the code, all checkings for token types are done.
655  // as we check for every type that NewickLexer yields, and we use a continue or break
656  // in each of them, we should never reach this point, unless we forgot a type!
657  assert(false);
658  }
659 
660  // Tree has to finish with semicolon. Particularly ct.type == TokenType::kEnd is not allowed
661  // to happen here!
662  if( ct.type != TokenType::kSemicolon ) {
663  throw std::runtime_error( "Tree does not finish with a semicolon." );
664  }
665 
666  return broker;
667 }
668 
669 // =================================================================================================
670 // Broker to Tree
671 // =================================================================================================
672 
673 Tree NewickReader::broker_to_tree_ (
674  NewickBroker const& broker
675 ) const {
676  Tree tree;
677 
678  auto& links = tree.expose_link_container();
679  auto& nodes = tree.expose_node_container();
680  auto& edges = tree.expose_edge_container();
681 
682  std::vector< TreeLink* > link_stack;
683 
684  // we need the ranks (number of immediate children) of all nodes
685  broker.assign_ranks();
686 
687  // Call all prepare plugins
688  for( auto const& prepare_plugin : prepare_reading_plugins ) {
689  prepare_plugin( broker, tree );
690  }
691 
692  // iterate over all nodes of the tree broker
693  for (auto b_itr = broker.cbegin(); b_itr != broker.cend(); ++b_itr) {
694  NewickBrokerElement const& broker_node = *b_itr;
695 
696  // create the tree node for this broker node
697  auto cur_node_u = utils::make_unique< TreeNode >();
698  auto cur_node = cur_node_u.get();
699  cur_node->reset_index( nodes.size() );
700 
701  // Create data pointer, if there is a suitable function. Otherwise, data is
702  // simply a nullptr, i.e., there is no data.
704  create_node_data_plugin( *cur_node );
705  }
706 
707  // Call all node plugins.
708  for( auto const& node_plugin : element_to_node_plugins ) {
709  node_plugin( broker_node, *cur_node );
710  }
711 
712  // Add the node.
713  nodes.push_back(std::move(cur_node_u));
714 
715  // create the link that points towards the root.
716  // this link is created for every node, root, inner and leaves.
717  auto up_link_u = utils::make_unique< TreeLink >();
718  auto up_link = up_link_u.get();
719  up_link->reset_node( cur_node );
720  cur_node->reset_primary_link( up_link );
721  up_link->reset_index( links.size() );
722  links.push_back(std::move(up_link_u));
723 
724  // establish the link towards the root
725  if (link_stack.empty()) {
726  // if the link stack is empty, we are currently at the very beginning of this loop,
727  // which means we are at the root itself. in this case, make the "link towards the root"
728  // point to itself.
729  up_link->reset_outer( up_link );
730  } else {
731  // if we are however in some other node (leaf or inner, but not the root), we establish
732  // the link "upwards" to the root, and back from there.
733  up_link->reset_outer( link_stack.back() );
734  link_stack.back()->reset_outer( up_link );
735 
736  // also, create an edge that connects both nodes
737  auto up_edge = utils::make_unique< TreeEdge >(
738  edges.size(),
739  link_stack.back(),
740  up_link
741  );
742 
743  up_link->reset_edge( up_edge.get() );
744  link_stack.back()->reset_edge( up_edge.get() );
745 
746  // Create data pointer, if there is a suitable function. Otherwise, data is
747  // simply a nullptr, i.e., there is no data.
749  create_edge_data_plugin( *up_edge );
750  }
751 
752  // Call all edge plugins.
753  for( auto const& edge_plugin : element_to_edge_plugins ) {
754  edge_plugin( broker_node, *up_edge );
755  }
756 
757  // Add the edge.
758  edges.push_back(std::move(up_edge));
759 
760  // we can now delete the head of the stack, because we just estiablished its "downlink"
761  // and thus are done with it
762  link_stack.pop_back();
763  }
764 
765  // in the following, we create the links that will connect to the nodes' children.
766  // for leaf nodes, this makes the next pointer point to the node itself (the loop
767  // is never executed in this case, as leaves have rank 0).
768  // for inner nodes, we create as many "down" links as they have children. each of them
769  // is pushed to the stack, so that for the next broker nodes they are available as
770  // reciever for the "up" links.
771  // in summary, make all next pointers of a node point to each other in a circle.
772  auto prev_link = up_link;
773  for (int i = 0; i < broker_node.rank(); ++i) {
774  auto down_link = utils::make_unique< TreeLink >();
775  prev_link->reset_next( down_link.get() );
776  prev_link = down_link.get();
777 
778  down_link->reset_node( cur_node );
779  down_link->reset_index( links.size() );
780  link_stack.push_back(down_link.get());
781  links.push_back(std::move(down_link));
782  }
783  prev_link->reset_next( up_link );
784  }
785 
786  // we pushed elements to the link_stack for all children of the nodes and popped them when we
787  // were done processing those children, so there should be no elements left. this assumes that
788  // NewickBroker.assign_ranks() does its job properly!
789  assert(link_stack.empty());
790 
791  // now delete the uplink of the root, in order to make the tree fully unrooted.
792  // (we do that after the tree creation, as it is way easier this way)
793  assert( &links.front()->outer() == links.front().get() );
794  auto next = &links.front()->next();
795  while( &next->next() != links.front().get() ) {
796  next = &next->next();
797  }
798  next->reset_next( &next->next().next() );
799  assert( &next->next() == &links.front()->next() );
800  links.erase(links.begin());
801  for (size_t i = 0; i < links.size(); ++i) {
802  links[i]->reset_index(i);
803  }
804  next->node().reset_primary_link( &next->next() );
805 
806  // Call all finish plugins.
807  for( auto const& finish_plugin : finish_reading_plugins ) {
808  finish_plugin( broker, tree );
809  }
810 
811  return tree;
812 }
813 
814 // =================================================================================================
815 // Settings
816 // =================================================================================================
817 
819 {
820  enable_tags_ = value;
821  return *this;
822 }
823 
825 {
826  return enable_tags_;
827 }
828 
830 {
831  stop_at_semicolon_ = value;
832  return *this;
833 }
834 
836 {
837  return stop_at_semicolon_;
838 }
839 
840 } // namespace tree
841 } // namespace genesis
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:116
void skip_while(InputStream &source, char criterion)
Lexing function that advances the stream while its current char equals the provided one...
Definition: scanner.hpp:151
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
bool enable_tags() const
Return whether currently Newick tags are enabled.
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Tree from_string(std::string const &tree_string) const
Read a Tree from a string containing a Newick tree.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:373
Provides some valuable additions to STD.
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_at_semicolon(), stop after the semicolon or continue until the...
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one...
Definition: scanner.hpp:214
Tree from_stream(std::istream &input_stream) const
Read a Tree from an input stream containing a Newick tree.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
std::vector< prepare_reading_function > prepare_reading_plugins
create_edge_data_function create_edge_data_plugin
Tree from_file(std::string const &filename) const
Read a Tree from a file containing a Newick tree.
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
Provides some commonly used string utility functions.
std::vector< finish_reading_function > finish_reading_plugins
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:48
std::vector< element_to_edge_function > element_to_edge_plugins
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
std::string read_until(InputStream &source, char criterion)
Lexing function that reads from the stream until its current char equals the provided one...
Definition: scanner.hpp:252
create_node_data_function create_node_data_plugin
Header of Tree class.
void from_strings(std::vector< std::string > const &tree_strings, TreeSet &tree_set, std::string const &default_name="") const
Fill a TreeSet from a list of strings containing Newick trees.
void from_files(std::vector< std::string > const &filenames, TreeSet &tree_set) const
Fill a TreeSet from a list of files containing Newick trees.
void add(std::string const &name, Tree const &tree)
Add a Tree with a name to the TreeSet.
Definition: tree_set.cpp:55
bool stop_at_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree...
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
std::vector< element_to_node_function > element_to_node_plugins