A library for working with phylogenetic and population genetic data.
v0.32.0
tree/formats/newick/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_FORMATS_NEWICK_READER_H_
2 #define GENESIS_TREE_FORMATS_NEWICK_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <functional>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 
44 // =================================================================================================
45 // Forward declarations
46 // =================================================================================================
47 
48 namespace utils {
49  class InputStream;
50 }
51 
52 namespace tree {
53 
54 class Tree;
55 class TreeNode;
56 class TreeEdge;
57 class TreeLink;
58 class TreeSet;
59 
60 class NewickBroker;
61 struct NewickBrokerElement;
62 
63 // =================================================================================================
64 // Newick Reader
65 // =================================================================================================
66 
68 {
69 public:
70 
71  // -------------------------------------------------------------------------
72  // Typedefs and Enums
73  // -------------------------------------------------------------------------
74 
82  using prepare_reading_function = std::function< void(
83  NewickBroker const& broker, Tree& tree
84  ) >;
85 
92  using finish_reading_function = std::function< void(
93  Tree& tree
94  ) >;
95 
105  using create_node_data_function = std::function< void( TreeNode& node ) >;
106 
116  using create_edge_data_function = std::function< void( TreeEdge& edge ) >;
117 
124  using element_to_node_function = std::function< void(
125  NewickBrokerElement const& element, TreeNode& node
126  ) >;
127 
134  using element_to_edge_function = std::function< void(
135  NewickBrokerElement const& element, TreeEdge& edge
136  ) >;
137 
138 private:
139 
140  enum class TokenType
141  {
142  kUnknown,
143  kOpeningParenthesis,
144  kClosingParenthesis,
145  kComma,
146  kSemicolon,
147  kEquals,
148  kComment,
149  kValue,
150  kTag,
151  kString,
152  kEnd
153  };
154 
155  struct Token
156  {
157  TokenType type = TokenType::kEnd;
158  std::string text;
159  size_t line;
160  size_t column;
161 
162  std::string at() const
163  {
164  return std::to_string( line ) + ":" + std::to_string( column );
165  }
166  };
167 
168  // -------------------------------------------------------------------------
169  // Constructor and Rule of Five
170  // -------------------------------------------------------------------------
171 
172 public:
173 
174  NewickReader() = default;
175  virtual ~NewickReader() = default;
176 
177  NewickReader(NewickReader const&) = default;
178  NewickReader(NewickReader&&) = default;
179 
180  NewickReader& operator= (NewickReader const&) = default;
181  NewickReader& operator= (NewickReader&&) = default;
182 
183  // -------------------------------------------------------------------------
184  // Reading
185  // -------------------------------------------------------------------------
186 
193  Tree read( std::shared_ptr<utils::BaseInputSource> source ) const;
194 
219  void read(
220  std::shared_ptr<utils::BaseInputSource> source,
221  TreeSet& target,
222  std::string const& default_name = ""
223  ) const;
224 
231  void read(
232  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
233  TreeSet& target,
234  std::string const& default_name = ""
235  ) const;
236 
243  TreeSet read(
244  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
245  std::string const& default_name = ""
246  ) const;
247 
248  // -------------------------------------------------------------------------
249  // Settings
250  // -------------------------------------------------------------------------
251 
271  NewickReader& enable_tags( bool value );
272 
278  bool enable_tags() const;
279 
296  NewickReader& stop_after_semicolon( bool value );
297 
304  bool stop_after_semicolon() const;
305 
306  // -------------------------------------------------------------------------
307  // Plugin Functions
308  // -------------------------------------------------------------------------
309 
310  std::vector<prepare_reading_function> prepare_reading_plugins;
311  std::vector<finish_reading_function> finish_reading_plugins;
312 
315 
316  std::vector<element_to_node_function> element_to_node_plugins;
317  std::vector<element_to_edge_function> element_to_edge_plugins;
318 
319  // -------------------------------------------------------------------------
320  // Parsing Functions
321  // -------------------------------------------------------------------------
322 
327  Tree parse_single_tree( utils::InputStream& input_stream ) const;
328 
333  utils::InputStream& input_stream,
334  TreeSet& tree_set,
335  std::string const& default_name
336  ) const;
337 
342  std::pair< std::string, Tree > parse_named_tree( utils::InputStream& input_stream ) const;
343 
350  Tree broker_to_tree( NewickBroker const& broker ) const;
351 
359 
360  // -------------------------------------------------------------------------
361  // Internal Member Functions
362  // -------------------------------------------------------------------------
363 
364 private:
365 
371  void broker_to_tree_prepare_(
372  NewickBroker const& broker,
373  Tree& tree
374  ) const;
375 
381  void broker_to_tree_element_(
382  NewickBrokerElement const& broker_node,
383  std::vector<TreeLink*>& link_stack,
384  Tree& tree
385  ) const;
386 
392  void broker_to_tree_finish_(
393  Tree& tree
394  ) const;
395 
399  void parse_trailing_input_( utils::InputStream& input_stream ) const;
400 
404  Token get_next_token_( utils::InputStream& input_stream ) const;
405 
409  NewickBroker parse_tree_to_broker_( utils::InputStream& input_stream ) const;
410 
411  // -------------------------------------------------------------------------
412  // Member Data
413  // -------------------------------------------------------------------------
414 
415  bool enable_tags_ = false;
416  bool stop_after_semicolon_ = false;
417 
418 };
419 
420 } // namespace tree
421 } // namespace genesis
422 
423 #endif // include guard
genesis::tree::NewickReader::create_node_data_plugin
create_node_data_function create_node_data_plugin
Definition: tree/formats/newick/reader.hpp:313
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::tree::NewickReader::element_to_node_function
std::function< void(NewickBrokerElement const &element, TreeNode &node) > element_to_node_function
Function type that translates from a NewickBrokerElement to a TreeNode.
Definition: tree/formats/newick/reader.hpp:126
genesis::tree::NewickReader::parse_named_tree
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
Definition: tree/formats/newick/reader.cpp:153
genesis::tree::NewickReader::create_edge_data_function
std::function< void(TreeEdge &edge) > create_edge_data_function
Function type used to create the data pointer for each TreeEdge.
Definition: tree/formats/newick/reader.hpp:116
genesis::tree::NewickReader
Definition: tree/formats/newick/reader.hpp:67
genesis::tree::NewickReader::finish_reading_plugins
std::vector< finish_reading_function > finish_reading_plugins
Definition: tree/formats/newick/reader.hpp:311
genesis::tree::NewickReader::create_edge_data_plugin
create_edge_data_function create_edge_data_plugin
Definition: tree/formats/newick/reader.hpp:314
genesis::placement::tree_set
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
genesis::tree::NewickReader::operator=
NewickReader & operator=(NewickReader const &)=default
genesis::tree::NewickReader::prepare_reading_function
std::function< void(NewickBroker const &broker, Tree &tree) > prepare_reading_function
Function type that allows to do some preparatory work with the NewickBroker and Tree before the actua...
Definition: tree/formats/newick/reader.hpp:84
genesis::tree::NewickReader::stop_after_semicolon
bool stop_after_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree.
Definition: tree/formats/newick/reader.cpp:850
genesis::tree::NewickReader::element_to_edge_function
std::function< void(NewickBrokerElement const &element, TreeEdge &edge) > element_to_edge_function
Function type that translates from a NewickBrokerElement to a TreeEdge.
Definition: tree/formats/newick/reader.hpp:136
genesis::tree::NewickReader::parse_multiple_trees
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
Definition: tree/formats/newick/reader.cpp:120
input_source.hpp
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::tree::TreeSet
Definition: tree_set.hpp:48
genesis::tree::NewickReader::create_node_data_function
std::function< void(TreeNode &node) > create_node_data_function
Function type used to create the data pointer for each TreeNode.
Definition: tree/formats/newick/reader.hpp:105
genesis::tree::NewickReader::element_to_edge_plugins
std::vector< element_to_edge_function > element_to_edge_plugins
Definition: tree/formats/newick/reader.hpp:317
genesis::tree::Tree
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
genesis::tree::NewickReader::prepare_reading_plugins
std::vector< prepare_reading_function > prepare_reading_plugins
Definition: tree/formats/newick/reader.hpp:310
genesis::tree::NewickReader::finish_reading_function
std::function< void(Tree &tree) > finish_reading_function
Function type that allows to do some finalizing work with the Tree after the actual tree reading fini...
Definition: tree/formats/newick/reader.hpp:94
genesis::tree::NewickReader::NewickReader
NewickReader()=default
genesis::tree::TreeEdge
Definition: edge.hpp:60
genesis::tree::TreeNode
Definition: tree/tree/node.hpp:58
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::tree::NewickReader::enable_tags
bool enable_tags() const
Return whether currently Newick tags are enabled.
Definition: tree/formats/newick/reader.cpp:839
genesis::tree::NewickReader::broker_to_tree_destructive
Tree broker_to_tree_destructive(NewickBroker &broker) const
Build a Tree from a NewickBroker.
Definition: tree/formats/newick/reader.cpp:660
genesis::tree::NewickBroker
Stores a Newick tree in an intermediate format that can be further processed into a Tree.
Definition: broker.hpp:106
genesis::tree::NewickReader::element_to_node_plugins
std::vector< element_to_node_function > element_to_node_plugins
Definition: tree/formats/newick/reader.hpp:316
genesis::tree::NewickReader::~NewickReader
virtual ~NewickReader()=default
genesis::tree::NewickReader::read
Tree read(std::shared_ptr< utils::BaseInputSource > source) const
Read a single Tree from an input source containing a Newick tree.
Definition: tree/formats/newick/reader.cpp:62
genesis::tree::NewickReader::broker_to_tree
Tree broker_to_tree(NewickBroker const &broker) const
Build a Tree from a NewickBroker.
Definition: tree/formats/newick/reader.cpp:642
genesis::tree::NewickBrokerElement
Store the information for one element of a Newick tree.
Definition: element.hpp:60
genesis::tree::NewickReader::parse_single_tree
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_after_semicolon(), stop after the semicolon or continue until ...
Definition: tree/formats/newick/reader.cpp:102