A toolkit for working with phylogenetic data.
v0.24.0
tree/formats/newick/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_FORMATS_NEWICK_READER_H_
2 #define GENESIS_TREE_FORMATS_NEWICK_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <functional>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 
44 // =================================================================================================
45 // Forward declarations
46 // =================================================================================================
47 
48 namespace utils {
49  class InputStream;
50 }
51 
52 namespace tree {
53 
54 class Tree;
55 class TreeNode;
56 class TreeEdge;
57 class TreeLink;
58 class TreeSet;
59 
60 class NewickBroker;
61 struct NewickBrokerElement;
62 
63 // =================================================================================================
64 // Newick Reader
65 // =================================================================================================
66 
68 {
69 public:
70 
71  // -------------------------------------------------------------------------
72  // Typedefs and Enums
73  // -------------------------------------------------------------------------
74 
82  using prepare_reading_function = std::function< void(
83  NewickBroker const& broker, Tree& tree
84  ) >;
85 
92  using finish_reading_function = std::function< void(
93  Tree& tree
94  ) >;
95 
105  using create_node_data_function = std::function< void( TreeNode& node ) >;
106 
116  using create_edge_data_function = std::function< void( TreeEdge& edge ) >;
117 
124  using element_to_node_function = std::function< void(
125  NewickBrokerElement const& element, TreeNode& node
126  ) >;
127 
134  using element_to_edge_function = std::function< void(
135  NewickBrokerElement const& element, TreeEdge& edge
136  ) >;
137 
138 private:
139 
140  enum class TokenType
141  {
142  kUnknown,
143  kOpeningParenthesis,
144  kClosingParenthesis,
145  kComma,
146  kSemicolon,
147  kEquals,
148  kComment,
149  kValue,
150  kTag,
151  kString,
152  kEnd
153  };
154 
155  struct Token
156  {
157  TokenType type = TokenType::kEnd;
158  std::string text;
159  size_t line;
160  size_t column;
161 
162  std::string at() const
163  {
164  return std::to_string( line ) + ":" + std::to_string( column );
165  }
166  };
167 
168  // -------------------------------------------------------------------------
169  // Constructor and Rule of Five
170  // -------------------------------------------------------------------------
171 
172 public:
173 
174  NewickReader() = default;
175  virtual ~NewickReader() = default;
176 
177  NewickReader(NewickReader const&) = default;
178  NewickReader(NewickReader&&) = default;
179 
180  NewickReader& operator= (NewickReader const&) = default;
181  NewickReader& operator= (NewickReader&&) = default;
182 
183  // -------------------------------------------------------------------------
184  // Reading
185  // -------------------------------------------------------------------------
186 
193  Tree read( std::shared_ptr<utils::BaseInputSource> source ) const;
194 
219  void read(
220  std::shared_ptr<utils::BaseInputSource> source,
221  TreeSet& target,
222  std::string const& default_name = ""
223  ) const;
224 
231  void read(
232  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
233  TreeSet& target,
234  std::string const& default_name = ""
235  ) const;
236 
243  TreeSet read(
244  std::vector<std::shared_ptr<utils::BaseInputSource>> sources,
245  std::string const& default_name = ""
246  ) const;
247 
248  // -------------------------------------------------------------------------
249  // Settings
250  // -------------------------------------------------------------------------
251 
271  NewickReader& enable_tags( bool value );
272 
278  bool enable_tags() const;
279 
296  NewickReader& stop_after_semicolon( bool value );
297 
304  bool stop_after_semicolon() const;
305 
306  // -------------------------------------------------------------------------
307  // Plugin Functions
308  // -------------------------------------------------------------------------
309 
310  std::vector<prepare_reading_function> prepare_reading_plugins;
311  std::vector<finish_reading_function> finish_reading_plugins;
312 
315 
316  std::vector<element_to_node_function> element_to_node_plugins;
317  std::vector<element_to_edge_function> element_to_edge_plugins;
318 
319  // -------------------------------------------------------------------------
320  // Parsing Functions
321  // -------------------------------------------------------------------------
322 
327  Tree parse_single_tree( utils::InputStream& input_stream ) const;
328 
332  void parse_multiple_trees(
333  utils::InputStream& input_stream,
334  TreeSet& tree_set,
335  std::string const& default_name
336  ) const;
337 
342  std::pair< std::string, Tree > parse_named_tree( utils::InputStream& input_stream ) const;
343 
350  Tree broker_to_tree( NewickBroker const& broker ) const;
351 
358  Tree broker_to_tree_destructive( NewickBroker& broker ) const;
359 
360  // -------------------------------------------------------------------------
361  // Internal Member Functions
362  // -------------------------------------------------------------------------
363 
364 private:
365 
371  void broker_to_tree_prepare_(
372  NewickBroker const& broker,
373  Tree& tree
374  ) const;
375 
381  void broker_to_tree_element_(
382  NewickBrokerElement const& broker_node,
383  std::vector<TreeLink*>& link_stack,
384  Tree& tree
385  ) const;
386 
392  void broker_to_tree_finish_(
393  Tree& tree
394  ) const;
395 
399  void parse_trailing_input_( utils::InputStream& input_stream ) const;
400 
404  Token get_next_token_( utils::InputStream& input_stream ) const;
405 
409  NewickBroker parse_tree_to_broker_( utils::InputStream& input_stream ) const;
410 
411  // -------------------------------------------------------------------------
412  // Member Data
413  // -------------------------------------------------------------------------
414 
415  bool enable_tags_ = false;
416  bool stop_after_semicolon_ = false;
417 
418 };
419 
420 } // namespace tree
421 } // namespace genesis
422 
423 #endif // include guard
std::function< void(TreeNode &node) > create_node_data_function
Function type used to create the data pointer for each TreeNode.
std::function< void(Tree &tree) > finish_reading_function
Function type that allows to do some finalizing work with the Tree after the actual tree reading fini...
std::function< void(NewickBrokerElement const &element, TreeEdge &edge) > element_to_edge_function
Function type that translates from a NewickBrokerElement to a TreeEdge.
std::function< void(NewickBrokerElement const &element, TreeNode &node) > element_to_node_function
Function type that translates from a NewickBrokerElement to a TreeNode.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
std::function< void(NewickBroker const &broker, Tree &tree) > prepare_reading_function
Function type that allows to do some preparatory work with the NewickBroker and Tree before the actua...
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:97
std::vector< prepare_reading_function > prepare_reading_plugins
create_edge_data_function create_edge_data_plugin
std::vector< finish_reading_function > finish_reading_plugins
std::vector< element_to_edge_function > element_to_edge_plugins
std::function< void(TreeEdge &edge) > create_edge_data_function
Function type used to create the data pointer for each TreeEdge.
create_node_data_function create_node_data_plugin
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
Store the information for one element of a Newick tree.
Definition: element.hpp:60
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
std::vector< element_to_node_function > element_to_node_plugins