A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
phylip_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <string>
38 
39 namespace genesis {
40 
41 // =================================================================================================
42 // Forward Declarations
43 // =================================================================================================
44 
45 namespace utils {
46 
47 class InputStream;
48 
49 } // namespace utils
50 
51 namespace sequence {
52 
53 class SequenceSet;
54 class Sequence;
55 
56 // =================================================================================================
57 // Phylip Reader
58 // =================================================================================================
59 
89 {
90 public:
91 
92  // ---------------------------------------------------------------------
93  // Types and Enums
94  // ---------------------------------------------------------------------
95 
99  struct Header
100  {
104  size_t num_sequences = 0;
105 
109  size_t len_sequences = 0;
110 
122  std::string options;
123  };
124 
129  enum class Mode
130  {
134  kSequential,
135 
139  kInterleaved,
140 
144  kAutomatic
145  };
146 
150  enum class SiteCasing
151  {
155  kUnchanged,
156 
160  kToUpper,
161 
165  kToLower
166  };
167 
168  // ---------------------------------------------------------------------
169  // Constructor and Rule of Five
170  // ---------------------------------------------------------------------
171 
177  PhylipReader();
178 
179  ~PhylipReader() = default;
180 
181  PhylipReader( PhylipReader const& ) = default;
182  PhylipReader( PhylipReader&& ) = default;
183 
184  PhylipReader& operator= ( PhylipReader const& ) = default;
185  PhylipReader& operator= ( PhylipReader&& ) = default;
186 
187  // ---------------------------------------------------------------------
188  // Reading
189  // ---------------------------------------------------------------------
190 
202  void from_stream( std::istream& input_stream, SequenceSet& sequence_set ) const;
203 
212  SequenceSet from_stream( std::istream& input_stream ) const;
213 
221  void from_file( std::string const& file_name, SequenceSet& sequence_set ) const;
222 
227  SequenceSet from_file( std::string const& file_name ) const;
228 
236  void from_string( std::string const& input_string, SequenceSet& sequence_set ) const;
237 
242  SequenceSet from_string( std::string const& input_string ) const;
243 
244  // ---------------------------------------------------------------------
245  // Parsing
246  // ---------------------------------------------------------------------
247 
258  Header parse_phylip_header(
260  ) const;
261 
269  std::string parse_phylip_label(
271  ) const;
272 
280  std::string parse_phylip_sequence_line(
282  ) const;
283 
288  utils::InputStream& it,
289  SequenceSet& sset
290  ) const;
291 
296  utils::InputStream& it,
297  SequenceSet& sset
298  ) const;
299 
300  // ---------------------------------------------------------------------
301  // Properties
302  // ---------------------------------------------------------------------
303 
325  PhylipReader& mode( Mode value );
326 
332  Mode mode() const;
333 
353  PhylipReader& label_length( size_t value );
354 
360  size_t label_length() const;
361 
370 
374  SiteCasing site_casing() const;
375 
393  PhylipReader& valid_chars( std::string const& chars );
394 
400  std::string valid_chars() const;
401 
409 
410  // ---------------------------------------------------------------------
411  // Members
412  // ---------------------------------------------------------------------
413 
414 private:
415 
416  Mode mode_ = Mode::kSequential;
417  size_t label_length_ = 0;
418 
419  SiteCasing site_casing_ = SiteCasing::kToUpper;
420  bool use_validation_ = false;
421  utils::CharLookup<bool> lookup_;
422 
423 };
424 
425 } // namespace sequence
426 } // namespace genesis
427 
428 #endif // include guard
Read Phylip sequence data.
void from_stream(std::istream &input_stream, SequenceSet &sequence_set) const
Read all Sequences from a std::istream in Phylip format into a SequenceSet.
Read the data in Phylip sequential mode.
std::string options
Store the options that might be at the end of the header line.
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
PhylipReader & operator=(PhylipReader const &)=default
std::string parse_phylip_label(utils::InputStream &it) const
Parse and return a Phylip label.
Do not change the case of the sites.
Helper that stores the header information of a Phylip file.
size_t label_length() const
Return the currently set label length.
Mode
Enum to distinguish between the different file variants of Phylip. See mode( Mode value ) for more de...
size_t len_sequences
Length of the sequences in the Phylip file.
size_t num_sequences
Number of sequences in the Phylip file.
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
Infer the Phylip mode via trial and error.
Header parse_phylip_header(utils::InputStream &it) const
Parse a Phylip header and return the contained sequence count and length.
Store a set of Sequences.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
std::string parse_phylip_sequence_line(utils::InputStream &it) const
Parse one sequence line.
void parse_phylip_interleaved(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the interleaved variant (Mode::kInterleaved).
Read the data in Phylip interleaved mode.
void from_string(std::string const &input_string, SequenceSet &sequence_set) const
Read all Sequences from a std::string in Phylip format into a SequenceSet.
void from_file(std::string const &file_name, SequenceSet &sequence_set) const
Read all Sequences from a file in Phylip format into a SequenceSet.
void parse_phylip_sequential(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the sequential variant (Mode::kSequential).
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
Stream interface for reading data from an InputSource, that keeps track of line and column counters...
PhylipReader()
Create a default PhylipReader. Per default, chars are turned upper case, but not validated.