A library for working with phylogenetic and population genetic data.
v0.27.0
tree/formats/newick/reader.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
35 #include "genesis/tree/tree.hpp"
36 
40 
46 
47 #include <cassert>
48 #include <cctype>
49 #include <deque>
50 #include <iostream>
51 #include <memory>
52 #include <sstream>
53 #include <stdexcept>
54 
55 namespace genesis {
56 namespace tree {
57 
58 // =================================================================================================
59 // Reading
60 // =================================================================================================
61 
62 Tree NewickReader::read( std::shared_ptr<utils::BaseInputSource> source ) const
63 {
64  utils::InputStream it( source );
65  return parse_single_tree( it );
66 }
67 
69  std::shared_ptr<utils::BaseInputSource> source,
70  TreeSet& target,
71  std::string const& default_name
72 ) const {
73  utils::InputStream it( source );
74  parse_multiple_trees( it, target, default_name );
75 }
76 
78  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
79  TreeSet& target,
80  std::string const& default_name
81 ) const {
82  for( auto const& source : sources ) {
83  read( source, target, default_name );
84  }
85 }
86 
88  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
89  std::string const& default_name
90 ) const {
91  TreeSet result;
92  for( auto const& source : sources ) {
93  read( source, result, default_name );
94  }
95  return result;
96 }
97 
98 // =================================================================================================
99 // Parse Single Tree
100 // =================================================================================================
101 
103 {
104  // Get name and tree, only use tree.
105  auto tree = parse_named_tree( input_stream ).second;
106 
107  // If we just read this tree, continue until end of stream.
108  if( ! stop_after_semicolon_ ) {
109  parse_trailing_input_( input_stream );
110  }
111 
112  // Return resulting tree.
113  return tree;
114 }
115 
116 // =================================================================================================
117 // Parse Multiple Trees
118 // =================================================================================================
119 
121  utils::InputStream& input_stream,
122  TreeSet& tree_set,
123  std::string const& default_name
124 ) const {
125  // Count how many unnamed trees we have seen.
126  size_t unnamed_ctr = 0;
127 
128  while( input_stream ) {
129 
130  // Get name and tree.
131  auto named_tree = parse_named_tree( input_stream );
132 
133  // If there are no trees left, we are done.
134  if( named_tree.first.empty() && named_tree.second.empty() ) {
135  return;
136  }
137 
138  // Fill in default name if needed.
139  if( named_tree.first.empty() ) {
140  named_tree.first = default_name + std::to_string( unnamed_ctr );
141  ++unnamed_ctr;
142  }
143 
144  // Store it in the TreeSet, without any copy steps.
145  tree_set.add( std::move( named_tree.second ), named_tree.first );
146  }
147 }
148 
149 // =================================================================================================
150 // Parse Named Tree
151 // =================================================================================================
152 
153 std::pair< std::string, Tree > NewickReader::parse_named_tree( utils::InputStream& input_stream ) const
154 {
155  // Helper function for valid tree name chars.
156  auto is_valid_tree_name_char = [&]( char c ){
157  return utils::is_print(c)
158  && ! utils::is_space(c)
159  && c != ';'
160  && c != '('
161  && c != ')'
162  && c != '='
163  ;
164  };
165 
166  // Skip leading stuff.
167  while( input_stream ) {
168 
169  // Whitespaces.
170  utils::skip_while( input_stream, utils::is_space );
171 
172  // No input, return empty tree.
173  // We can never read an empty tree from an input, so this is useful to distungish
174  // whether we were able to read a tree from the input.
175  if( ! input_stream ) {
176  return {};
177  }
178 
179  // Skip comments.
180  if( *input_stream == '[' ) {
181  ++input_stream;
182  utils::read_until( input_stream, ']' );
183 
184  if( !input_stream ) {
185  throw std::runtime_error(
186  "Reached unexpected end of Newick tree at " + input_stream.at()
187  );
188  }
189  assert( *input_stream == ']' );
190  ++input_stream;
191 
192  continue;
193  }
194 
195  // If neither applies, we are done here.
196  break;
197  }
198  assert( input_stream );
199 
200  // Get the name of the current tree, if there is one.
201  std::string name = "";
202  if( *input_stream != '(' ) {
203 
204  // Distinguish between names in quotes, and those without.
205  // Names without quotes cannot contain certain chars, see is_valid_tree_name_char().
206  if( *input_stream == '"' || *input_stream == '\'' ) {
207  name = utils::parse_quoted_string( input_stream, false, true, false );
208  } else {
209  name = utils::read_while( input_stream, is_valid_tree_name_char );
210  }
211 
212  // Always allow white spaces...
213  utils::skip_while( input_stream, utils::is_space );
214 
215  // After a name, we expect an equals sign.
216  if( *input_stream != '=' ) {
217  throw std::runtime_error(
218  "Invalid character '" + std::string( 1, *input_stream ) + "' at "
219  + input_stream.at() + ". Expecting '='."
220  );
221  }
222 
223  assert( *input_stream == '=' );
224  ++input_stream;
225 
226  // After a name, there has to be something.
227  if( ! input_stream ) {
228  throw std::runtime_error( "Unexpected end of tree at " + input_stream.at() + "." );
229  }
230  }
231 
232  // Parse the tree and return it.
233  auto broker = parse_tree_to_broker_( input_stream );
234  auto tree = broker_to_tree_destructive( broker );
235  return { name, std::move( tree ) };
236 }
237 
238 // =================================================================================================
239 // Parse Trailing Input
240 // =================================================================================================
241 
242 void NewickReader::parse_trailing_input_( utils::InputStream& input_stream ) const
243 {
244  // Check for more data after the semicolon. We cannot do this check in the parsing function,
245  // as there are cases where we read a Newick tree as part of another file (e.g, Nexus or Jplace),
246  // where it is natural that there is more data after the tree finished.
247  Token ct;
248  ct.type = TokenType::kUnknown;
249  while( input_stream && ct.type != TokenType::kEnd ) {
250  ct = get_next_token_( input_stream );
251  if( ct.type != TokenType::kEnd && ct.type != TokenType::kComment ) {
252  throw std::runtime_error( "Tree contains more data after the semicolon at " + ct.at() );
253  }
254  }
255 }
256 
257 // =================================================================================================
258 // Token Lexing
259 // =================================================================================================
260 
261 NewickReader::Token NewickReader::get_next_token_( utils::InputStream& input_stream ) const
262 {
263  // Prepare result token.
264  Token result;
265 
266  // Shorthand.
267  auto& is = input_stream;
268 
269  // Helper function to distinguish valid chars in a Newick name string.
270  // According to http://evolution.genetics.washington.edu/phylip/newicktree.html :
271  // "A name can be any string of printable characters except blanks, colons, semicolons,
272  // parentheses, and square brackets." Well, they forgot to mention commas here.
273  // But we knew before that Newick is not a good format anyway...
274  // Also, if enable_tags_ is true, we do not allow {}, as those are used for tags.
275  auto is_valid_name_char = [&]( char c ){
276  return utils::is_print(c)
277  && ! utils::is_space(c)
278  && c != ':'
279  && c != ';'
280  && c != '('
281  && c != ')'
282  && c != '['
283  && c != ']'
284  && c != ','
285  && ( ! enable_tags_ || ( c != '{' && c != '}' ))
286  ;
287  };
288 
289  // Skip initial whitespace, then set the current position in the stream.
290  // This is where the token begins.
292  result.line = is.line();
293  result.column = is.column();
294 
295  // Find token type and text from the stream.
296  if( !is ) {
297  result.type = TokenType::kEnd;
298 
299  } else if( *is == '(' ) {
300  result.type = TokenType::kOpeningParenthesis;
301  ++is;
302 
303  } else if( *is == ')' ) {
304  result.type = TokenType::kClosingParenthesis;
305  ++is;
306 
307  } else if( *is == ',' ) {
308  result.type = TokenType::kComma;
309  ++is;
310 
311  } else if( *is == ';' ) {
312  result.type = TokenType::kSemicolon;
313  ++is;
314 
315  } else if( *is == '=' ) {
316  result.type = TokenType::kEquals;
317  ++is;
318 
319  } else if( *is == '[' ) {
320  result.type = TokenType::kComment;
321  ++is;
322  result.text = utils::read_until( is, ']' );
323 
324  if( !is ) {
325  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
326  }
327  assert( *is == ']' );
328  ++is;
329 
330  } else if( *is == ':' ) {
331  result.type = TokenType::kValue;
332  ++is;
333  result.text = utils::parse_number_string( is );
334 
335  } else if( *is == '{' && enable_tags_ ) {
336  result.type = TokenType::kTag;
337  ++is;
338  result.text = utils::read_until( is, '}' );
339 
340  if( !is ) {
341  throw std::runtime_error( "Reached unexpected end of Newick tree at " + is.at() );
342  }
343  assert( *is == '}' );
344  ++is;
345 
346  } else if( *is == '"' || *is == '\'' ) {
347  result.type = TokenType::kString;
348  result.text = utils::parse_quoted_string( is, false, true, false );
349 
350  } else if( is_valid_name_char( *is )) {
351  result.type = TokenType::kString;
352  result.text = utils::read_while( is, is_valid_name_char );
353 
354  } else {
355  result.type = TokenType::kUnknown;
356  }
357 
358  return result;
359 }
360 
361 // =================================================================================================
362 // Parse Tree into Broker
363 // =================================================================================================
364 
365 NewickBroker NewickReader::parse_tree_to_broker_( utils::InputStream& input_stream ) const
366 {
367  // Create result broker.
368  NewickBroker broker;
369 
370  // Create a node that is currently being populated with data.
371  // This is copied into the broker whenever we finish a tree node.
372  NewickBrokerElement node;
373 
374  // How deep is the current token nested in the tree?
375  int depth = 0;
376 
377  // Was it closed at some point? We want to avoid a tree like "()();" to be parsed!
378  bool closed = false;
379 
380  // Store current token, start with an invalid value to indicate problems.
381  Token ct;
382  ct.type = TokenType::kEnd;
383 
384  // Store previous token.
385  // In the beginning of the loop, we set pt to ct, so that in the first iteration we have
386  // pt == TokenType::kEnd. This is used as indicator that we are in the first iteration.
387  Token pt;
388 
389  // --------------------------------------------------------------
390  // Loop over lexer tokens and check if it...
391  // --------------------------------------------------------------
392 
393  while( input_stream ) {
394  // Init the previous token to what the current token (of the previous iteration) was.
395  // In the first iteration, this inits to the kEnd token.
396  // Then, get the next token.
397  pt = ct;
398  ct = get_next_token_( input_stream );
399 
400  // Treat some special error cases.
401  if( ct.type == TokenType::kUnknown ) {
402  throw std::runtime_error(
403  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
404  );
405  }
406  if( ct.type == TokenType::kEnd ) {
407  break;
408  }
409 
410  // ------------------------------------------------------
411  // is bracket '(' ==> begin of subtree
412  // ------------------------------------------------------
413 
414  if( ct.type == TokenType::kOpeningParenthesis ) {
415  if( pt.type != TokenType::kEnd && !(
416  pt.type == TokenType::kOpeningParenthesis ||
417  pt.type == TokenType::kComma ||
418  pt.type == TokenType::kComment
419  )) {
420  throw std::runtime_error(
421  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
422  );
423  }
424 
425  if (closed) {
426  throw std::runtime_error(
427  "Tree was already closed. Cannot reopen it with '(' at " + ct.at() + "."
428  );
429  }
430 
431  ++depth;
432  continue;
433  }
434 
435  // ------------------------------------------------------
436  // Prepare for all other tokens.
437  // ------------------------------------------------------
438 
439  // if we reach this, the previous condition is not fullfilled (otherwise, the
440  // `continue` statement just above would
441  // have been called). so we have a token other than '(', which means we should already
442  // be somewhere in the tree (or a comment). check, if that is true.
443  if( pt.type == TokenType::kEnd ) {
444 
445  // If it is a comment before the start of the tree, we cannot attach it to any node,
446  // so just skip it and reset the current token to end, so that the next iteration
447  // starts fresh.
448  if( ct.type == TokenType::kComment ) {
449  ct.type = TokenType::kEnd;
450  continue;
451  }
452 
453  throw std::runtime_error( "Tree does not start with '(' at " + ct.at() + "." );
454  }
455 
456  // if we reached this point in code, this means that ct != begin, so it is not the first
457  // iteration in this loop. this means that pt was already set in the loop header (at least
458  // once), which means it now points to a valid token.
459  assert( pt.type != TokenType::kEnd );
460 
461  // set up the node that will be filled with data now.
462  // We use depth == -1 as an indicator whether it is already initialized.
463  // If it is already initialized, this means we are adding more information to it, e.g.
464  // a branch length or a tag. so we do not need to create it.
465  // however, if this node is not initialized, this means we saw a token before that finished
466  // a node and pushed it to the stack (either closing bracket or comma), so we need to init
467  // it here.
468  if( node.depth == -1 ) {
469  node.depth = depth;
470  }
471 
472  // ------------------------------------------------------
473  // is symbol or string ==> label
474  // ------------------------------------------------------
475 
476  if( ct.type == TokenType::kString ) {
477  if (!(
478  pt.type == TokenType::kOpeningParenthesis ||
479  pt.type == TokenType::kClosingParenthesis ||
480  pt.type == TokenType::kComma ||
481  pt.type == TokenType::kComment
482  )) {
483  throw std::runtime_error(
484  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
485  );
486  }
487 
488  // populate the node
489  node.name = ct.text;
490  continue;
491  }
492 
493  // ------------------------------------------------------
494  // is number ==> branch length
495  // ------------------------------------------------------
496 
497  if( ct.type == TokenType::kValue ) {
498  if (!(
499  pt.type == TokenType::kOpeningParenthesis ||
500  pt.type == TokenType::kClosingParenthesis ||
501  pt.type == TokenType::kString ||
502  pt.type == TokenType::kValue ||
503  pt.type == TokenType::kComma ||
504  pt.type == TokenType::kComment
505  )) {
506  throw std::runtime_error(
507  "Invalid characters at " + ct.at() + ": '" + ct.text + "'."
508  );
509  }
510 
511  // populate the node
512  node.values.push_back( ct.text );
513  continue;
514  }
515 
516  // ------------------------------------------------------
517  // is tag {} ==> tag
518  // ------------------------------------------------------
519 
520  if( ct.type == TokenType::kTag ) {
521  // in some newick extensions, a tag has a semantic meaning that belongs to the
522  // current node/edge, thus we need to store it
523 
524  // populate the node
525  node.tags.push_back(ct.text);
526  continue;
527  }
528 
529  // ------------------------------------------------------
530  // is comment [] ==> comment
531  // ------------------------------------------------------
532 
533  if( ct.type == TokenType::kComment ) {
534  // in some newick extensions, a comment has a semantic meaning that belongs to the
535  // current node/edge, thus we need to store it
536 
537  // populate the node
538  node.comments.push_back(ct.text);
539  continue;
540  }
541 
542  // ------------------------------------------------------
543  // is comma ',' ==> next subtree
544  // ------------------------------------------------------
545 
546  if( ct.type == TokenType::kComma ) {
547  if (!(
548  pt.type == TokenType::kOpeningParenthesis ||
549  pt.type == TokenType::kClosingParenthesis ||
550  pt.type == TokenType::kString ||
551  pt.type == TokenType::kComma ||
552  pt.type == TokenType::kValue ||
553  pt.type == TokenType::kTag ||
554  pt.type == TokenType::kComment
555  )) {
556  throw std::runtime_error( "Invalid ',' at " + ct.at() + "." );
557  }
558 
559  // Store and finish the current node. Then, make a new, uninitialized one.
560  broker.push_top( node );
561  node = NewickBrokerElement();
562  continue;
563  }
564 
565  // ------------------------------------------------------
566  // is bracket ')' ==> end of subtree
567  // ------------------------------------------------------
568 
569  if( ct.type == TokenType::kClosingParenthesis ) {
570  if (depth == 0) {
571  throw std::runtime_error( "Too many ')' at " + ct.at() + "." );
572  }
573  if (!(
574  pt.type == TokenType::kOpeningParenthesis ||
575  pt.type == TokenType::kClosingParenthesis ||
576  pt.type == TokenType::kString ||
577  pt.type == TokenType::kComma ||
578  pt.type == TokenType::kValue ||
579  pt.type == TokenType::kTag ||
580  pt.type == TokenType::kComment
581  )) {
582  throw std::runtime_error( "Invalid ')' at " + ct.at() + ": '" + ct.text + "'." );
583  }
584 
585  // Store and finish the current node. Then, make a new, uninitialized one.
586  broker.push_top( node );
587  node = NewickBrokerElement();
588 
589  // decrease depth and check if this was the parenthesis that closed the tree
590  --depth;
591  if (depth == 0) {
592  closed = true;
593  }
594  continue;
595  }
596 
597  // ------------------------------------------------------
598  // is semicolon ';' ==> end of tree
599  // ------------------------------------------------------
600 
601  if( ct.type == TokenType::kSemicolon ) {
602  if (depth != 0) {
603  throw std::runtime_error(
604  "Not enough ')' in tree before closing it with ';' at " + ct.at() + "."
605  );
606  }
607  if (!(
608  pt.type == TokenType::kClosingParenthesis ||
609  pt.type == TokenType::kString ||
610  pt.type == TokenType::kValue ||
611  pt.type == TokenType::kTag ||
612  pt.type == TokenType::kComment
613  )) {
614  throw std::runtime_error( "Invalid ';' at " + ct.at() + ": '" + ct.text + "'." );
615  }
616 
617  // Store and finish the current node. Then, make a new, uninitialized one.
618  broker.push_top( node );
619  node = NewickBrokerElement();
620  break;
621  }
622 
623  // If we reach this part of the code, all checkings for token types are done.
624  // as we check for every type that NewickLexer yields, and we use a continue or break
625  // in each of them, we should never reach this point, unless we forgot a type!
626  assert(false);
627  }
628 
629  // Tree has to finish with semicolon. Particularly ct.type == TokenType::kEnd is not allowed
630  // to happen here!
631  if( ct.type != TokenType::kSemicolon ) {
632  throw std::runtime_error( "Tree does not finish with a semicolon." );
633  }
634 
635  return broker;
636 }
637 
638 // =================================================================================================
639 // Broker to Tree
640 // =================================================================================================
641 
643 {
644  // Prepare result and intermediate data.
645  Tree tree;
646  std::vector< TreeLink* > link_stack;
647  broker_to_tree_prepare_( broker, tree );
648 
649  // Iterate over all nodes of the tree broker.
650  for( auto b_itr = broker.cbegin(); b_itr != broker.cend(); ++b_itr ) {
651  broker_to_tree_element_( *b_itr, link_stack, tree );
652  }
653  assert(link_stack.empty());
654 
655  // Finish the work.
656  broker_to_tree_finish_( tree );
657  return tree;
658 }
659 
661 {
662  // Prepare result and intermediate data.
663  Tree tree;
664  std::vector< TreeLink* > link_stack;
665  broker_to_tree_prepare_( broker, tree );
666 
667  // Iterate over all nodes of the tree broker,
668  // then delete the element from the broker to save space.
669  while( ! broker.empty() ) {
670  broker_to_tree_element_( broker.top(), link_stack, tree );
671  broker.pop_top();
672  }
673  assert(link_stack.empty());
674 
675  // Finish the work.
676  broker_to_tree_finish_( tree );
677  return tree;
678 }
679 
680 void NewickReader::broker_to_tree_prepare_( NewickBroker const& broker, Tree& tree ) const
681 {
682  // we need the ranks (number of immediate children) of all nodes
683  broker.assign_ranks();
684 
685  // Call all prepare plugins
686  for( auto const& prepare_plugin : prepare_reading_plugins ) {
687  prepare_plugin( broker, tree );
688  }
689 }
690 
691 void NewickReader::broker_to_tree_element_(
692  NewickBrokerElement const& broker_node,
693  std::vector<TreeLink*>& link_stack,
694  Tree& tree
695 ) const {
696  // Shortcut to tree containers.
697  auto& links = tree.expose_link_container();
698  auto& nodes = tree.expose_node_container();
699  auto& edges = tree.expose_edge_container();
700 
701  // create the tree node for this broker node
702  auto cur_node_u = utils::make_unique< TreeNode >();
703  auto cur_node = cur_node_u.get();
704  cur_node->reset_index( nodes.size() );
705 
706  // Create data pointer, if there is a suitable function. Otherwise, data is
707  // simply a nullptr, i.e., there is no data.
709  create_node_data_plugin( *cur_node );
710  }
711 
712  // Call all node plugins.
713  for( auto const& node_plugin : element_to_node_plugins ) {
714  node_plugin( broker_node, *cur_node );
715  }
716 
717  // Add the node.
718  nodes.push_back(std::move(cur_node_u));
719 
720  // create the link that points towards the root.
721  // this link is created for every node, root, inner and leaves.
722  auto up_link_u = utils::make_unique< TreeLink >();
723  auto up_link = up_link_u.get();
724  up_link->reset_node( cur_node );
725  cur_node->reset_primary_link( up_link );
726  up_link->reset_index( links.size() );
727  links.push_back(std::move(up_link_u));
728 
729  // establish the link towards the root
730  if (link_stack.empty()) {
731  // if the link stack is empty, we are currently at the very beginning of this loop,
732  // which means we are at the root itself. in this case, make the "link towards the root"
733  // point to itself.
734  up_link->reset_outer( up_link );
735  } else {
736  // if we are however in some other node (leaf or inner, but not the root), we establish
737  // the link "upwards" to the root, and back from there.
738  up_link->reset_outer( link_stack.back() );
739  link_stack.back()->reset_outer( up_link );
740 
741  // also, create an edge that connects both nodes
742  auto up_edge = utils::make_unique< TreeEdge >(
743  edges.size(),
744  link_stack.back(),
745  up_link
746  );
747 
748  up_link->reset_edge( up_edge.get() );
749  link_stack.back()->reset_edge( up_edge.get() );
750 
751  // Create data pointer, if there is a suitable function. Otherwise, data is
752  // simply a nullptr, i.e., there is no data.
754  create_edge_data_plugin( *up_edge );
755  }
756 
757  // Call all edge plugins.
758  for( auto const& edge_plugin : element_to_edge_plugins ) {
759  edge_plugin( broker_node, *up_edge );
760  }
761 
762  // Add the edge.
763  edges.push_back(std::move(up_edge));
764 
765  // we can now delete the head of the stack, because we just estiablished its "downlink"
766  // and thus are done with it
767  link_stack.pop_back();
768  }
769 
770  // in the following, we create the links that will connect to the nodes' children.
771  // for leaf nodes, this makes the next pointer point to the node itself (the loop
772  // is never executed in this case, as leaves have rank 0).
773  // for inner nodes, we create as many "down" links as they have children. each of them
774  // is pushed to the stack, so that for the next broker nodes they are available as
775  // reciever for the "up" links.
776  // in summary, make all next pointers of a node point to each other in a circle.
777  auto prev_link = up_link;
778  for (int i = 0; i < broker_node.rank(); ++i) {
779  auto down_link = utils::make_unique< TreeLink >();
780  prev_link->reset_next( down_link.get() );
781  prev_link = down_link.get();
782 
783  down_link->reset_node( cur_node );
784  down_link->reset_index( links.size() );
785  link_stack.push_back(down_link.get());
786  links.push_back(std::move(down_link));
787  }
788  prev_link->reset_next( up_link );
789 }
790 
791 void NewickReader::broker_to_tree_finish_(
792  Tree& tree
793 ) const {
794  // Shortcut to tree containers.
795  auto& links = tree.expose_link_container();
796 
797  // we pushed elements to the link_stack for all children of the nodes and popped them when we
798  // were done processing those children, so there should be no elements left. this assumes that
799  // NewickBroker.assign_ranks() does its job properly!
800  // Already checked in the caller functions!
801  // assert(link_stack.empty());
802 
803  // now delete the uplink of the root, in order to make the tree fully unrooted.
804  // (we do that after the tree creation, as it is way easier this way)
805  assert( &links.front()->outer() == links.front().get() );
806  auto next = &links.front()->next();
807  while( &next->next() != links.front().get() ) {
808  next = &next->next();
809  }
810  next->reset_next( &next->next().next() );
811  assert( &next->next() == &links.front()->next() );
812  links.erase(links.begin());
813  for (size_t i = 0; i < links.size(); ++i) {
814  links[i]->reset_index(i);
815  }
816  next->node().reset_primary_link( &next->next() );
817 
818  // Lastly, make sure to correctly set the root link of the tree.
819  // Assert that the link is already the primary link of its node.
820  assert( links.front().get() == &links.front().get()->node().link() );
821  tree.reset_root_link( links.front().get() );
822 
823  // Call all finish plugins.
824  for( auto const& finish_plugin : finish_reading_plugins ) {
825  finish_plugin( tree );
826  }
827 }
828 
829 // =================================================================================================
830 // Settings
831 // =================================================================================================
832 
834 {
835  enable_tags_ = value;
836  return *this;
837 }
838 
840 {
841  return enable_tags_;
842 }
843 
845 {
846  stop_after_semicolon_ = value;
847  return *this;
848 }
849 
851 {
852  return stop_after_semicolon_;
853 }
854 
855 } // namespace tree
856 } // namespace genesis
genesis::utils::InputStream::at
std::string at() const
Return a textual representation of the current input position in the form "line:column".
Definition: input_stream.hpp:481
genesis::tree::TreeSet::add
void add(Tree const &tree, std::string const &name="")
Add a Tree with a name to the TreeSet.
Definition: tree_set.hpp:88
genesis::tree::NewickReader::create_node_data_plugin
create_node_data_function create_node_data_plugin
Definition: tree/formats/newick/reader.hpp:313
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
parser.hpp
broker.hpp
genesis::tree::NewickReader::parse_named_tree
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
Definition: tree/formats/newick/reader.cpp:153
genesis::utils::read_while
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one....
Definition: scanner.hpp:216
genesis::tree::NewickReader
Definition: tree/formats/newick/reader.hpp:67
fs.hpp
Provides functions for accessing the file system.
genesis::tree::NewickReader::finish_reading_plugins
std::vector< finish_reading_function > finish_reading_plugins
Definition: tree/formats/newick/reader.hpp:311
genesis::tree::NewickBroker::top
NewickBrokerElement & top()
Returns a reference to the top node of the tree stack.
Definition: broker.cpp:213
genesis::tree::NewickReader::create_edge_data_plugin
create_edge_data_function create_edge_data_plugin
Definition: tree/formats/newick/reader.hpp:314
genesis::placement::tree_set
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
tree.hpp
Header of Tree class.
tree_set.hpp
std.hpp
Provides some valuable additions to STD.
genesis::tree::NewickReader::stop_after_semicolon
bool stop_after_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree.
Definition: tree/formats/newick/reader.cpp:850
genesis::tree::NewickBroker::cbegin
const_iterator cbegin() const
Const version of begin().
Definition: broker.cpp:257
genesis::tree::NewickReader::parse_multiple_trees
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
Definition: tree/formats/newick/reader.cpp:120
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genesis::tree::TreeSet
Definition: tree_set.hpp:48
string.hpp
Provides some commonly used string utility functions.
input_stream.hpp
genesis::tree::NewickReader::element_to_edge_plugins
std::vector< element_to_edge_function > element_to_edge_plugins
Definition: tree/formats/newick/reader.hpp:317
genesis::utils::is_space
constexpr bool is_space(char c) noexcept
Return whether a char is some form of white space charater, so either space, tab, new line,...
Definition: char.hpp:200
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
genesis::tree::NewickReader::prepare_reading_plugins
std::vector< prepare_reading_function > prepare_reading_plugins
Definition: tree/formats/newick/reader.hpp:310
logging.hpp
Provides easy and fast logging functionality.
genesis::utils::is_print
constexpr bool is_print(char c) noexcept
Return whether a char is a printable character, according to isprint of the cctype header,...
Definition: char.hpp:209
genesis::tree::NewickBroker::pop_top
void pop_top()
Definition: broker.cpp:71
genesis::utils::skip_while
void skip_while(InputStream &source, char criterion)
Lexing function that advances the stream while its current char equals the provided one.
Definition: scanner.hpp:153
genesis::utils::read_until
std::string read_until(InputStream &source, char criterion)
Lexing function that reads from the stream until its current char equals the provided one....
Definition: scanner.hpp:254
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::NewickReader::enable_tags
bool enable_tags() const
Return whether currently Newick tags are enabled.
Definition: tree/formats/newick/reader.cpp:839
genesis::tree::NewickReader::broker_to_tree_destructive
Tree broker_to_tree_destructive(NewickBroker &broker) const
Build a Tree from a NewickBroker.
Definition: tree/formats/newick/reader.cpp:660
genesis::tree::NewickBroker
Stores a Newick tree in an intermediate format that can be further processed into a Tree.
Definition: broker.hpp:106
char.hpp
genesis::tree::NewickBroker::assign_ranks
void assign_ranks() const
Iterate over the tree and assign ranks (= number of immediate children) to all nodes.
Definition: broker.cpp:85
reader.hpp
genesis::tree::NewickReader::element_to_node_plugins
std::vector< element_to_node_function > element_to_node_plugins
Definition: tree/formats/newick/reader.hpp:316
genesis::tree::NewickBroker::empty
bool empty() const
Returns whether the stack is empty.
Definition: broker.cpp:173
scanner.hpp
genesis::utils::parse_quoted_string
std::string parse_quoted_string(utils::InputStream &source, bool use_escapes, bool use_twin_quotes, bool include_qmarks)
Read a string in quotation marks from a stream and return it.
Definition: parser.cpp:116
genesis::tree::NewickReader::read
Tree read(std::shared_ptr< utils::BaseInputSource > source) const
Read a single Tree from an input source containing a Newick tree.
Definition: tree/formats/newick/reader.cpp:62
genesis::tree::NewickReader::broker_to_tree
Tree broker_to_tree(NewickBroker const &broker) const
Build a Tree from a NewickBroker.
Definition: tree/formats/newick/reader.cpp:642
genesis::utils::parse_number_string
std::string parse_number_string(utils::InputStream &source)
Read a general number string from an input stream.
Definition: parser.cpp:48
genesis::tree::NewickReader::parse_single_tree
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_after_semicolon(), stop after the semicolon or continue until ...
Definition: tree/formats/newick/reader.cpp:102
genesis::tree::NewickBroker::cend
const_iterator cend() const
Const version of end().
Definition: broker.cpp:262