A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
tree/formats/newick/reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_TREE_FORMATS_NEWICK_READER_H_
2 #define GENESIS_TREE_FORMATS_NEWICK_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <iosfwd>
35 #include <functional>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 namespace genesis {
41 
42 // =================================================================================================
43 // Forward declarations
44 // =================================================================================================
45 
46 namespace utils {
47  class InputStream;
48 }
49 
50 namespace tree {
51 
52 class Tree;
53 class TreeNode;
54 class TreeEdge;
55 class TreeSet;
56 
57 class NewickBroker;
58 struct NewickBrokerElement;
59 
60 // =================================================================================================
61 // Newick Reader
62 // =================================================================================================
63 
65 {
66 public:
67 
68  // -------------------------------------------------------------------------
69  // Typedefs and Enums
70  // -------------------------------------------------------------------------
71 
79  using prepare_reading_function = std::function< void(
80  NewickBroker const& broker, Tree& tree
81  ) >;
82 
89  using finish_reading_function = std::function< void(
90  NewickBroker const& broker, Tree& tree
91  ) >;
92 
102  using create_node_data_function = std::function< void( TreeNode& node ) >;
103 
113  using create_edge_data_function = std::function< void( TreeEdge& edge ) >;
114 
121  using element_to_node_function = std::function< void(
122  NewickBrokerElement const& element, TreeNode& node
123  ) >;
124 
131  using element_to_edge_function = std::function< void(
132  NewickBrokerElement const& element, TreeEdge& edge
133  ) >;
134 
135 private:
136 
137  enum class TokenType
138  {
139  kUnknown,
142  kComma,
143  kSemicolon,
144  kEquals,
145  kComment,
146  kValue,
147  kTag,
148  kString,
149  kEnd
150  };
151 
152  struct Token
153  {
154  TokenType type = TokenType::kEnd;
155  std::string text;
156  size_t line;
157  size_t column;
158 
159  std::string at() const
160  {
161  return std::to_string( line ) + ":" + std::to_string( column );
162  }
163  };
164 
165  // -------------------------------------------------------------------------
166  // Constructor and Rule of Five
167  // -------------------------------------------------------------------------
168 
169 public:
170 
171  NewickReader() = default;
172  virtual ~NewickReader() = default;
173 
174  NewickReader(NewickReader const&) = default;
175  NewickReader(NewickReader&&) = default;
176 
177  NewickReader& operator= (NewickReader const&) = default;
178  NewickReader& operator= (NewickReader&&) = default;
179 
180  // -------------------------------------------------------------------------
181  // Reading a single Tree
182  // -------------------------------------------------------------------------
183 
187  Tree from_stream( std::istream& input_stream ) const;
188 
192  Tree from_file( std::string const& filename ) const;
193 
197  Tree from_string( std::string const& tree_string ) const;
198 
199  // -------------------------------------------------------------------------
200  // Reading into a TreeSet
201  // -------------------------------------------------------------------------
202 
210  void from_stream(
211  std::istream& input_stream,
212  TreeSet& tree_set,
213  std::string const& default_name = ""
214  ) const;
215 
223  void from_file(
224  std::string const& filename,
225  TreeSet& tree_set,
226  std::string const& default_name = ""
227  ) const;
228 
250  void from_string(
251  std::string const& tree_string,
252  TreeSet& tree_set,
253  std::string const& default_name = ""
254  ) const;
255 
256  // -------------------------------------------------------------------------
257  // Reading multiple input sources
258  // -------------------------------------------------------------------------
259 
265  void from_files(
266  std::vector<std::string> const& filenames,
267  TreeSet& tree_set
268  ) const;
269 
273  void from_strings(
274  std::vector<std::string> const& tree_strings,
275  TreeSet& tree_set,
276  std::string const& default_name = ""
277  ) const;
278 
279  // -------------------------------------------------------------------------
280  // Settings
281  // -------------------------------------------------------------------------
282 
302  NewickReader& enable_tags( bool value );
303 
309  bool enable_tags() const;
310 
327  NewickReader& stop_at_semicolon( bool value );
328 
335  bool stop_at_semicolon() const;
336 
337  // -------------------------------------------------------------------------
338  // Plugin Functions
339  // -------------------------------------------------------------------------
340 
341  std::vector<prepare_reading_function> prepare_reading_plugins;
342  std::vector<finish_reading_function> finish_reading_plugins;
343 
346 
347  std::vector<element_to_node_function> element_to_node_plugins;
348  std::vector<element_to_edge_function> element_to_edge_plugins;
349 
350  // -------------------------------------------------------------------------
351  // Parsing Functions
352  // -------------------------------------------------------------------------
353 
358  Tree parse_single_tree( utils::InputStream& input_stream ) const;
359 
364  utils::InputStream& input_stream,
365  TreeSet& tree_set,
366  std::string const& default_name
367  ) const;
368 
373  std::pair< std::string, Tree > parse_named_tree( utils::InputStream& input_stream ) const;
374 
375  // -------------------------------------------------------------------------
376  // Internal Member Functions
377  // -------------------------------------------------------------------------
378 
379 private:
380 
384  void parse_trailing_input_( utils::InputStream& input_stream ) const;
385 
389  Token get_next_token_( utils::InputStream& input_stream ) const;
390 
394  NewickBroker parse_tree_to_broker_( utils::InputStream& input_stream ) const;
395 
399  Tree broker_to_tree_( NewickBroker const& broker ) const;
400 
401  // -------------------------------------------------------------------------
402  // Member Data
403  // -------------------------------------------------------------------------
404 
405  bool enable_tags_ = false;
406  bool stop_at_semicolon_ = false;
407 
408 };
409 
410 } // namespace tree
411 } // namespace genesis
412 
413 #endif // include guard
std::function< void(TreeNode &node) > create_node_data_function
Function type used to create the data pointer for each TreeNode.
std::pair< std::string, Tree > parse_named_tree(utils::InputStream &input_stream) const
Parse one named tree, i.e., a tree as described here.
std::function< void(NewickBrokerElement const &element, TreeEdge &edge) > element_to_edge_function
Function type that translates from a NewickBrokerElement to a TreeEdge.
NewickReader & operator=(NewickReader const &)=default
bool enable_tags() const
Return whether currently Newick tags are enabled.
std::function< void(NewickBrokerElement const &element, TreeNode &node) > element_to_node_function
Function type that translates from a NewickBrokerElement to a TreeNode.
Tree from_string(std::string const &tree_string) const
Read a Tree from a string containing a Newick tree.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:373
Tree parse_single_tree(utils::InputStream &input_stream) const
Parse a single tree. Depending on stop_at_semicolon(), stop after the semicolon or continue until the...
Stores a Newick tree in an intermediate format that can be further processed into a Tree...
Definition: broker.hpp:106
std::function< void(NewickBroker const &broker, Tree &tree) > prepare_reading_function
Function type that allows to do some preparatory work with the NewickBroker and Tree before the actua...
Tree from_stream(std::istream &input_stream) const
Read a Tree from an input stream containing a Newick tree.
Class for representing phylogenetic trees.
Definition: tree/tree.hpp:95
std::vector< prepare_reading_function > prepare_reading_plugins
create_edge_data_function create_edge_data_plugin
Tree from_file(std::string const &filename) const
Read a Tree from a file containing a Newick tree.
void parse_multiple_trees(utils::InputStream &input_stream, TreeSet &tree_set, std::string const &default_name) const
Parse until the end of the stream and add all Trees to the TreeSet.
std::vector< finish_reading_function > finish_reading_plugins
std::vector< element_to_edge_function > element_to_edge_plugins
tree::TreeSet tree_set(SampleSet const &sample_set)
Return a TreeSet containing all the trees of the SampleSet.
Definition: sample_set.cpp:156
std::function< void(TreeEdge &edge) > create_edge_data_function
Function type used to create the data pointer for each TreeEdge.
create_node_data_function create_node_data_plugin
std::function< void(NewickBroker const &broker, Tree &tree) > finish_reading_function
Function type that allows to do some finalizing work with the NewickBroker and Tree after the actual ...
void from_strings(std::vector< std::string > const &tree_strings, TreeSet &tree_set, std::string const &default_name="") const
Fill a TreeSet from a list of strings containing Newick trees.
void from_files(std::vector< std::string > const &filenames, TreeSet &tree_set) const
Fill a TreeSet from a list of files containing Newick trees.
virtual ~NewickReader()=default
bool stop_at_semicolon() const
Return whether currently reading stops after the semicolon that finishes a Newick tree...
Store the information for one element of a Newick tree.
Definition: element.hpp:60
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
std::vector< element_to_node_function > element_to_node_plugins