A toolkit for working with phylogenetic data.
v0.24.0
phylip_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_PHYLIP_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <iosfwd>
38 #include <string>
39 
40 namespace genesis {
41 
42 // =================================================================================================
43 // Forward Declarations
44 // =================================================================================================
45 
46 namespace utils {
47 
48 class InputStream;
49 
50 } // namespace utils
51 
52 namespace sequence {
53 
54 class SequenceSet;
55 class Sequence;
56 
57 // =================================================================================================
58 // Phylip Reader
59 // =================================================================================================
60 
87 {
88 public:
89 
90  // ---------------------------------------------------------------------
91  // Types and Enums
92  // ---------------------------------------------------------------------
93 
97  struct Header
98  {
102  size_t num_sequences = 0;
103 
107  size_t len_sequences = 0;
108 
120  std::string options;
121  };
122 
127  enum class Mode
128  {
132  kSequential,
133 
137  kInterleaved
138  };
139 
143  enum class SiteCasing
144  {
148  kUnchanged,
149 
153  kToUpper,
154 
158  kToLower
159  };
160 
161  // ---------------------------------------------------------------------
162  // Constructor and Rule of Five
163  // ---------------------------------------------------------------------
164 
171  PhylipReader();
172 
173  ~PhylipReader() = default;
174 
175  PhylipReader( PhylipReader const& ) = default;
176  PhylipReader( PhylipReader&& ) = default;
177 
178  PhylipReader& operator= ( PhylipReader const& ) = default;
179  PhylipReader& operator= ( PhylipReader&& ) = default;
180 
181  // ---------------------------------------------------------------------
182  // Reading
183  // ---------------------------------------------------------------------
184 
192  SequenceSet read( std::shared_ptr<utils::BaseInputSource> source ) const;
193 
205  void read( std::shared_ptr<utils::BaseInputSource> source, SequenceSet& target ) const;
206 
207  // ---------------------------------------------------------------------
208  // Parsing
209  // ---------------------------------------------------------------------
210 
221  Header parse_phylip_header(
223  ) const;
224 
232  std::string parse_phylip_label(
234  ) const;
235 
243  std::string parse_phylip_sequence_line(
245  ) const;
246 
250  void parse_phylip_sequential(
251  utils::InputStream& it,
252  SequenceSet& sset
253  ) const;
254 
258  void parse_phylip_interleaved(
259  utils::InputStream& it,
260  SequenceSet& sset
261  ) const;
262 
263  // ---------------------------------------------------------------------
264  // Properties
265  // ---------------------------------------------------------------------
266 
282  PhylipReader& mode( Mode value );
283 
289  Mode mode() const;
290 
310  PhylipReader& label_length( size_t value );
311 
317  size_t label_length() const;
318 
326  PhylipReader& site_casing( SiteCasing value );
327 
331  SiteCasing site_casing() const;
332 
350  PhylipReader& remove_digits( bool value );
351 
355  bool remove_digits() const;
356 
374  PhylipReader& valid_chars( std::string const& chars );
375 
381  std::string valid_chars() const;
382 
389  utils::CharLookup<bool>& valid_char_lookup();
390 
391  // ---------------------------------------------------------------------
392  // Members
393  // ---------------------------------------------------------------------
394 
395 private:
396 
397  Mode mode_ = Mode::kSequential;
398  size_t label_length_ = 0;
399 
400  SiteCasing site_casing_ = SiteCasing::kToUpper;
401  bool remove_digits_ = false;
402  bool use_validation_ = false;
403  utils::CharLookup<bool> lookup_;
404 
405 };
406 
407 } // namespace sequence
408 } // namespace genesis
409 
410 #endif // include guard
Read Phylip sequence data.
std::string options
Store the options that might be at the end of the header line.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Helper that stores the header information of a Phylip file.
Mode
Enum to distinguish between the different file variants of Phylip. See mode( Mode value ) for more de...
Store a set of Sequences.
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
Stream interface for reading data from an InputSource, that keeps track of line and column counters...