A library for working with phylogenetic and population genetic data.
v0.32.0
phylip_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <iosfwd>
38 #include <string>
39 
40 namespace genesis {
41 
42 // =================================================================================================
43 // Forward Declarations
44 // =================================================================================================
45 
46 namespace utils {
47 
48 class InputStream;
49 
50 } // namespace utils
51 
52 namespace sequence {
53 
54 class SequenceSet;
55 class Sequence;
56 
57 // =================================================================================================
58 // Phylip Reader
59 // =================================================================================================
60 
87 {
88 public:
89 
90  // ---------------------------------------------------------------------
91  // Types and Enums
92  // ---------------------------------------------------------------------
93 
97  struct Header
98  {
102  size_t num_sequences = 0;
103 
107  size_t len_sequences = 0;
108 
120  std::string options;
121  };
122 
127  enum class Mode
128  {
132  kSequential,
133 
138  };
139 
143  enum class SiteCasing
144  {
148  kUnchanged,
149 
153  kToUpper,
154 
158  kToLower
159  };
160 
161  // ---------------------------------------------------------------------
162  // Constructor and Rule of Five
163  // ---------------------------------------------------------------------
164 
171  PhylipReader();
172 
173  ~PhylipReader() = default;
174 
175  PhylipReader( PhylipReader const& ) = default;
176  PhylipReader( PhylipReader&& ) = default;
177 
178  PhylipReader& operator= ( PhylipReader const& ) = default;
179  PhylipReader& operator= ( PhylipReader&& ) = default;
180 
181  // ---------------------------------------------------------------------
182  // Reading
183  // ---------------------------------------------------------------------
184 
192  SequenceSet read( std::shared_ptr<utils::BaseInputSource> source ) const;
193 
205  void read( std::shared_ptr<utils::BaseInputSource> source, SequenceSet& target ) const;
206 
207  // ---------------------------------------------------------------------
208  // Parsing
209  // ---------------------------------------------------------------------
210 
221  Header parse_phylip_header(
223  ) const;
224 
232  std::string parse_phylip_label(
234  ) const;
235 
243  std::string parse_phylip_sequence_line(
245  ) const;
246 
251  utils::InputStream& it,
252  SequenceSet& sset
253  ) const;
254 
259  utils::InputStream& it,
260  SequenceSet& sset
261  ) const;
262 
263  // ---------------------------------------------------------------------
264  // Properties
265  // ---------------------------------------------------------------------
266 
282  PhylipReader& mode( Mode value );
283 
289  Mode mode() const;
290 
310  PhylipReader& label_length( size_t value );
311 
317  size_t label_length() const;
318 
327 
331  SiteCasing site_casing() const;
332 
350  PhylipReader& remove_digits( bool value );
351 
355  bool remove_digits() const;
356 
374  PhylipReader& valid_chars( std::string const& chars );
375 
381  std::string valid_chars() const;
382 
390 
391  // ---------------------------------------------------------------------
392  // Members
393  // ---------------------------------------------------------------------
394 
395 private:
396 
397  Mode mode_ = Mode::kSequential;
398  size_t label_length_ = 0;
399 
400  SiteCasing site_casing_ = SiteCasing::kToUpper;
401  bool remove_digits_ = false;
402  bool use_validation_ = false;
403  utils::CharLookup<bool> lookup_;
404 
405 };
406 
407 } // namespace sequence
408 } // namespace genesis
409 
410 #endif // include guard
genesis::sequence::PhylipReader::SiteCasing::kToLower
@ kToLower
Make all sites lower case.
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:88
genesis::sequence::PhylipReader::SiteCasing::kToUpper
@ kToUpper
Make all sites upper case.
genesis::sequence::PhylipReader::parse_phylip_interleaved
void parse_phylip_interleaved(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the interleaved variant (Mode::kInterleaved).
Definition: phylip_reader.cpp:272
genesis::sequence::PhylipReader::SiteCasing
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
Definition: phylip_reader.hpp:143
genesis::sequence::PhylipReader::valid_char_lookup
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
Definition: phylip_reader.cpp:415
genesis::sequence::PhylipReader::operator=
PhylipReader & operator=(PhylipReader const &)=default
genesis::sequence::PhylipReader::parse_phylip_label
std::string parse_phylip_label(utils::InputStream &it) const
Parse and return a Phylip label.
Definition: phylip_reader.cpp:142
genesis::sequence::PhylipReader::site_casing
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
Definition: phylip_reader.cpp:374
input_source.hpp
genesis::sequence::PhylipReader::Header
Helper that stores the header information of a Phylip file.
Definition: phylip_reader.hpp:97
genesis::sequence::PhylipReader::~PhylipReader
~PhylipReader()=default
genesis::sequence::PhylipReader::read
SequenceSet read(std::shared_ptr< utils::BaseInputSource > source) const
Read all Sequences from an input source in Phylip format and return them as a SequenceSet.
Definition: phylip_reader.cpp:65
genesis::sequence::PhylipReader::parse_phylip_header
Header parse_phylip_header(utils::InputStream &it) const
Parse a Phylip header and return the contained sequence count and length.
Definition: phylip_reader.cpp:96
genesis::sequence::PhylipReader
Read Phylip sequence data.
Definition: phylip_reader.hpp:86
char_lookup.hpp
genesis::sequence::PhylipReader::Mode
Mode
Enum to distinguish between the different file variants of Phylip. See mode( Mode value ) for more de...
Definition: phylip_reader.hpp:127
genesis::sequence::PhylipReader::SiteCasing::kUnchanged
@ kUnchanged
Do not change the case of the sites.
genesis::sequence::PhylipReader::Header::len_sequences
size_t len_sequences
Length of the sequences in the Phylip file.
Definition: phylip_reader.hpp:107
genesis::utils::CharLookup< bool >
genesis::sequence::PhylipReader::parse_phylip_sequence_line
std::string parse_phylip_sequence_line(utils::InputStream &it) const
Parse one sequence line.
Definition: phylip_reader.cpp:184
genesis::sequence::PhylipReader::parse_phylip_sequential
void parse_phylip_sequential(utils::InputStream &it, SequenceSet &sset) const
Parse a whole Phylip file using the sequential variant (Mode::kSequential).
Definition: phylip_reader.cpp:222
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::PhylipReader::Mode::kInterleaved
@ kInterleaved
Read the data in Phylip interleaved mode.
genesis::sequence::SequenceSet
Store a set of Sequences.
Definition: sequence_set.hpp:53
genesis::sequence::PhylipReader::label_length
size_t label_length() const
Return the currently set label length.
Definition: phylip_reader.cpp:363
genesis::sequence::PhylipReader::Header::num_sequences
size_t num_sequences
Number of sequences in the Phylip file.
Definition: phylip_reader.hpp:102
genesis::sequence::PhylipReader::Header::options
std::string options
Store the options that might be at the end of the header line.
Definition: phylip_reader.hpp:120
genesis::sequence::PhylipReader::mode
Mode mode() const
Definition: phylip_reader.cpp:352
genesis::sequence::PhylipReader::PhylipReader
PhylipReader()
Create a default PhylipReader. Per default, chars are turned upper case, but not validated.
Definition: phylip_reader.cpp:56
genesis::sequence::PhylipReader::remove_digits
bool remove_digits() const
Return whether digits are removed from the Sequence.
Definition: phylip_reader.cpp:385
genesis::sequence::PhylipReader::valid_chars
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
Definition: phylip_reader.cpp:404
genesis::sequence::PhylipReader::Mode::kSequential
@ kSequential
Read the data in Phylip sequential mode.