69 std::shared_ptr<utils::BaseInputSource> source,
71 std::string
const& default_name
78 std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
80 std::string
const& default_name
82 for(
auto const& source : sources ) {
83 read( source, target, default_name );
88 std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
89 std::string
const& default_name
92 for(
auto const& source : sources ) {
93 read( source, result, default_name );
108 if( ! stop_after_semicolon_ ) {
109 parse_trailing_input_( input_stream );
123 std::string
const& default_name
126 size_t unnamed_ctr = 0;
128 while( input_stream ) {
134 if( named_tree.first.empty() && named_tree.second.empty() ) {
139 if( named_tree.first.empty() ) {
145 tree_set.
add( std::move( named_tree.second ), named_tree.first );
156 auto is_valid_tree_name_char = [&](
char c ){
167 while( input_stream ) {
175 if( ! input_stream ) {
180 if( *input_stream ==
'[' ) {
184 if( !input_stream ) {
185 throw std::runtime_error(
186 "Reached unexpected end of Newick tree at " + input_stream.
at()
189 assert( *input_stream ==
']' );
198 assert( input_stream );
201 std::string name =
"";
202 if( *input_stream !=
'(' ) {
206 if( *input_stream ==
'"' || *input_stream ==
'\'' ) {
216 if( *input_stream !=
'=' ) {
217 throw std::runtime_error(
218 "Invalid character '" + std::string( 1, *input_stream ) +
"' at "
219 + input_stream.
at() +
". Expecting '='."
223 assert( *input_stream ==
'=' );
227 if( ! input_stream ) {
228 throw std::runtime_error(
"Unexpected end of tree at " + input_stream.
at() +
"." );
233 auto broker = parse_tree_to_broker_( input_stream );
235 return { name, std::move( tree ) };
248 ct.type = TokenType::kUnknown;
249 while( input_stream && ct.type != TokenType::kEnd ) {
250 ct = get_next_token_( input_stream );
251 if( ct.type != TokenType::kEnd && ct.type != TokenType::kComment ) {
252 throw std::runtime_error(
"Tree contains more data after the semicolon at " + ct.at() );
261 NewickReader::Token NewickReader::get_next_token_( utils::InputStream& input_stream )
const
267 auto& is = input_stream;
275 auto is_valid_name_char = [&](
char c ){
285 && ( ! enable_tags_ || ( c !=
'{' && c !=
'}' ))
292 result.line = is.line();
293 result.column = is.column();
297 result.type = TokenType::kEnd;
299 }
else if( *is ==
'(' ) {
300 result.type = TokenType::kOpeningParenthesis;
303 }
else if( *is ==
')' ) {
304 result.type = TokenType::kClosingParenthesis;
307 }
else if( *is ==
',' ) {
308 result.type = TokenType::kComma;
311 }
else if( *is ==
';' ) {
312 result.type = TokenType::kSemicolon;
315 }
else if( *is ==
'=' ) {
316 result.type = TokenType::kEquals;
319 }
else if( *is ==
'[' ) {
320 result.type = TokenType::kComment;
325 throw std::runtime_error(
"Reached unexpected end of Newick tree at " + is.at() );
327 assert( *is ==
']' );
330 }
else if( *is ==
':' ) {
331 result.type = TokenType::kValue;
335 }
else if( *is ==
'{' && enable_tags_ ) {
336 result.type = TokenType::kTag;
341 throw std::runtime_error(
"Reached unexpected end of Newick tree at " + is.at() );
343 assert( *is ==
'}' );
346 }
else if( *is ==
'"' || *is ==
'\'' ) {
347 result.type = TokenType::kString;
350 }
else if( is_valid_name_char( *is )) {
351 result.type = TokenType::kString;
355 result.type = TokenType::kUnknown;
365 NewickBroker NewickReader::parse_tree_to_broker_( utils::InputStream& input_stream )
const
372 NewickBrokerElement node;
382 ct.type = TokenType::kEnd;
393 while( input_stream ) {
398 ct = get_next_token_( input_stream );
401 if( ct.type == TokenType::kUnknown ) {
402 throw std::runtime_error(
403 "Invalid characters at " + ct.at() +
": '" + ct.text +
"'."
406 if( ct.type == TokenType::kEnd ) {
414 if( ct.type == TokenType::kOpeningParenthesis ) {
415 if( pt.type != TokenType::kEnd && !(
416 pt.type == TokenType::kOpeningParenthesis ||
417 pt.type == TokenType::kComma ||
418 pt.type == TokenType::kComment
420 throw std::runtime_error(
421 "Invalid characters at " + ct.at() +
": '" + ct.text +
"'."
426 throw std::runtime_error(
427 "Tree was already closed. Cannot reopen it with '(' at " + ct.at() +
"."
443 if( pt.type == TokenType::kEnd ) {
448 if( ct.type == TokenType::kComment ) {
449 ct.type = TokenType::kEnd;
453 throw std::runtime_error(
"Tree does not start with '(' at " + ct.at() +
"." );
459 assert( pt.type != TokenType::kEnd );
468 if( node.depth == -1 ) {
476 if( ct.type == TokenType::kString ) {
478 pt.type == TokenType::kOpeningParenthesis ||
479 pt.type == TokenType::kClosingParenthesis ||
480 pt.type == TokenType::kComma ||
481 pt.type == TokenType::kComment
483 throw std::runtime_error(
484 "Invalid characters at " + ct.at() +
": '" + ct.text +
"'."
497 if( ct.type == TokenType::kValue ) {
499 pt.type == TokenType::kOpeningParenthesis ||
500 pt.type == TokenType::kClosingParenthesis ||
501 pt.type == TokenType::kString ||
502 pt.type == TokenType::kValue ||
503 pt.type == TokenType::kComma ||
504 pt.type == TokenType::kComment
506 throw std::runtime_error(
507 "Invalid characters at " + ct.at() +
": '" + ct.text +
"'."
512 node.values.push_back( ct.text );
520 if( ct.type == TokenType::kTag ) {
525 node.tags.push_back(ct.text);
533 if( ct.type == TokenType::kComment ) {
538 node.comments.push_back(ct.text);
546 if( ct.type == TokenType::kComma ) {
548 pt.type == TokenType::kOpeningParenthesis ||
549 pt.type == TokenType::kClosingParenthesis ||
550 pt.type == TokenType::kString ||
551 pt.type == TokenType::kComma ||
552 pt.type == TokenType::kValue ||
553 pt.type == TokenType::kTag ||
554 pt.type == TokenType::kComment
556 throw std::runtime_error(
"Invalid ',' at " + ct.at() +
"." );
560 broker.push_top( node );
561 node = NewickBrokerElement();
569 if( ct.type == TokenType::kClosingParenthesis ) {
571 throw std::runtime_error(
"Too many ')' at " + ct.at() +
"." );
574 pt.type == TokenType::kOpeningParenthesis ||
575 pt.type == TokenType::kClosingParenthesis ||
576 pt.type == TokenType::kString ||
577 pt.type == TokenType::kComma ||
578 pt.type == TokenType::kValue ||
579 pt.type == TokenType::kTag ||
580 pt.type == TokenType::kComment
582 throw std::runtime_error(
"Invalid ')' at " + ct.at() +
": '" + ct.text +
"'." );
586 broker.push_top( node );
587 node = NewickBrokerElement();
601 if( ct.type == TokenType::kSemicolon ) {
603 throw std::runtime_error(
604 "Not enough ')' in tree before closing it with ';' at " + ct.at() +
"."
608 pt.type == TokenType::kClosingParenthesis ||
609 pt.type == TokenType::kString ||
610 pt.type == TokenType::kValue ||
611 pt.type == TokenType::kTag ||
612 pt.type == TokenType::kComment
614 throw std::runtime_error(
"Invalid ';' at " + ct.at() +
": '" + ct.text +
"'." );
618 broker.push_top( node );
619 node = NewickBrokerElement();
631 if( ct.type != TokenType::kSemicolon ) {
632 throw std::runtime_error(
"Tree does not finish with a semicolon." );
646 std::vector< TreeLink* > link_stack;
647 broker_to_tree_prepare_( broker, tree );
650 for(
auto b_itr = broker.
cbegin(); b_itr != broker.
cend(); ++b_itr ) {
651 broker_to_tree_element_( *b_itr, link_stack, tree );
653 assert(link_stack.empty());
656 broker_to_tree_finish_( tree );
664 std::vector< TreeLink* > link_stack;
665 broker_to_tree_prepare_( broker, tree );
669 while( ! broker.
empty() ) {
670 broker_to_tree_element_( broker.
top(), link_stack, tree );
673 assert(link_stack.empty());
676 broker_to_tree_finish_( tree );
680 void NewickReader::broker_to_tree_prepare_(
NewickBroker const& broker,
Tree& tree )
const
687 prepare_plugin( broker, tree );
691 void NewickReader::broker_to_tree_element_(
692 NewickBrokerElement
const& broker_node,
693 std::vector<TreeLink*>& link_stack,
697 auto& links = tree.expose_link_container();
698 auto& nodes = tree.expose_node_container();
699 auto& edges = tree.expose_edge_container();
702 auto cur_node_u = utils::make_unique< TreeNode >();
703 auto cur_node = cur_node_u.get();
704 cur_node->reset_index( nodes.size() );
714 node_plugin( broker_node, *cur_node );
718 nodes.push_back(std::move(cur_node_u));
722 auto up_link_u = utils::make_unique< TreeLink >();
723 auto up_link = up_link_u.get();
724 up_link->reset_node( cur_node );
725 cur_node->reset_primary_link( up_link );
726 up_link->reset_index( links.size() );
727 links.push_back(std::move(up_link_u));
730 if (link_stack.empty()) {
734 up_link->reset_outer( up_link );
738 up_link->reset_outer( link_stack.back() );
739 link_stack.back()->reset_outer( up_link );
742 auto up_edge = utils::make_unique< TreeEdge >(
748 up_link->reset_edge( up_edge.get() );
749 link_stack.back()->reset_edge( up_edge.get() );
759 edge_plugin( broker_node, *up_edge );
763 edges.push_back(std::move(up_edge));
767 link_stack.pop_back();
777 auto prev_link = up_link;
778 for (
int i = 0; i < broker_node.rank(); ++i) {
779 auto down_link = utils::make_unique< TreeLink >();
780 prev_link->reset_next( down_link.get() );
781 prev_link = down_link.get();
783 down_link->reset_node( cur_node );
784 down_link->reset_index( links.size() );
785 link_stack.push_back(down_link.get());
786 links.push_back(std::move(down_link));
788 prev_link->reset_next( up_link );
791 void NewickReader::broker_to_tree_finish_(
795 auto& links = tree.expose_link_container();
805 assert( &links.front()->outer() == links.front().get() );
806 auto next = &links.front()->next();
807 while( &next->next() != links.front().get() ) {
808 next = &next->next();
810 next->reset_next( &next->next().next() );
811 assert( &next->next() == &links.front()->next() );
812 links.erase(links.begin());
813 for (
size_t i = 0; i < links.size(); ++i) {
814 links[i]->reset_index(i);
816 next->node().reset_primary_link( &next->next() );
820 assert( links.front().get() == &links.front().get()->node().link() );
821 tree.reset_root_link( links.front().get() );
825 finish_plugin( tree );
835 enable_tags_ = value;
846 stop_after_semicolon_ = value;
852 return stop_after_semicolon_;