A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
phylip_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <string>
38 
39 namespace genesis {
40 
41 // =================================================================================================
42 // Forward Declarations
43 // =================================================================================================
44 
45 namespace utils {
46 
47 class InputStream;
48 
49 } // namespace utils
50 
51 namespace sequence {
52 
53 class SequenceSet;
54 class Sequence;
55 
56 // =================================================================================================
57 // Phylip Reader
58 // =================================================================================================
59 
89 {
90 public:
91 
92  // ---------------------------------------------------------------------
93  // Types and Enums
94  // ---------------------------------------------------------------------
95 
99  struct Header
100  {
104  size_t num_sequences = 0;
105 
109  size_t len_sequences = 0;
110 
122  std::string options;
123  };
124 
129  enum class Mode
130  {
134  kSequential,
135 
139  kInterleaved,
140 
144  kAutomatic
145  };
146 
147  // ---------------------------------------------------------------------
148  // Constructor and Rule of Five
149  // ---------------------------------------------------------------------
150 
156  PhylipReader();
157 
158  ~PhylipReader() = default;
159 
160  PhylipReader( PhylipReader const& ) = default;
161  PhylipReader( PhylipReader&& ) = default;
162 
163  PhylipReader& operator= ( PhylipReader const& ) = default;
164  PhylipReader& operator= ( PhylipReader&& ) = default;
165 
166  // ---------------------------------------------------------------------
167  // Reading
168  // ---------------------------------------------------------------------
169 
181  void from_stream( std::istream& input_stream, SequenceSet& sequence_set ) const;
182 
191  SequenceSet from_stream( std::istream& input_stream ) const;
192 
200  void from_file( std::string const& file_name, SequenceSet& sequence_set ) const;
201 
206  SequenceSet from_file( std::string const& file_name ) const;
207 
215  void from_string( std::string const& input_string, SequenceSet& sequence_set ) const;
216 
221  SequenceSet from_string( std::string const& input_string ) const;
222 
223  // ---------------------------------------------------------------------
224  // Parsing
225  // ---------------------------------------------------------------------
226 
237  Header parse_phylip_header(
239  ) const;
240 
248  std::string parse_phylip_label(
250  ) const;
251 
259  std::string parse_phylip_sequence_line(
261  ) const;
262 
267  utils::InputStream& it,
268  SequenceSet& sset
269  ) const;
270 
275  utils::InputStream& it,
276  SequenceSet& sset
277  ) const;
278 
279  // ---------------------------------------------------------------------
280  // Properties
281  // ---------------------------------------------------------------------
282 
304  PhylipReader& mode( Mode value );
305 
311  Mode mode() const;
312 
332  PhylipReader& label_length( size_t value );
333 
339  size_t label_length() const;
340 
349  PhylipReader& to_upper( bool value );
350 
354  bool to_upper() const;
355 
373  PhylipReader& valid_chars( std::string const& chars );
374 
380  std::string valid_chars() const;
381 
389 
390  // ---------------------------------------------------------------------
391  // Members
392  // ---------------------------------------------------------------------
393 
394 private:
395 
396  Mode mode_ = Mode::kSequential;
397  size_t label_length_ = 0;
398 
399  bool to_upper_ = true;
400  bool use_validation_ = false;
401  utils::CharLookup<bool> lookup_;
402 
403 };
404 
405 } // namespace sequence
406 } // namespace genesis
407 
408 #endif // include guard
Read Phylip sequence data.
void from_stream(std::istream &input_stream, SequenceSet &sequence_set) const
Read all Sequences from a std::istream in Phylip format into a SequenceSet.
Read the data in Phylip sequential mode.
std::string options
Store the options that might be at the end of the header line.
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
PhylipReader & operator=(PhylipReader const &)=default
std::string parse_phylip_label(utils::InputStream &it) const
Parse and return a Phylip label.
Helper that stores the header information of a Phylip file.
bool to_upper() const
Return whether Sequence sites are automatically turned into upper case.
size_t label_length() const
Return the currently set label length.
Mode
Enum to distinguish between the different file variants of Phylip. See mode( Mode value ) for more de...
size_t len_sequences
Length of the sequences in the Phylip file.
size_t num_sequences
Number of sequences in the Phylip file.
Infer the Phylip mode via trial and error.
Header parse_phylip_header(utils::InputStream &it) const
Parse a Phylip header and return the contained sequence count and length.
Store a set of Sequences.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
std::string parse_phylip_sequence_line(utils::InputStream &it) const
Parse one sequence line.
void parse_phylip_interleaved(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the sequential variant (Mode::kSequential).
Read the data in Phylip interleaved mode.
void from_string(std::string const &input_string, SequenceSet &sequence_set) const
Read all Sequences from a std::string in Phylip format into a SequenceSet.
void from_file(std::string const &file_name, SequenceSet &sequence_set) const
Read all Sequences from a file in Phylip format into a SequenceSet.
void parse_phylip_sequential(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the interleaved variant (Mode::kInterleaved).
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
PhylipReader()
Create a default PhylipReader. Per default, chars are turned upper case, but not validated.