A toolkit for working with phylogenetic data.
v0.24.0
fasta_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <iosfwd>
38 #include <memory>
39 #include <string>
40 
41 namespace genesis {
42 
43 // =================================================================================================
44 // Forward Declarations
45 // =================================================================================================
46 
47 namespace utils {
48  class InputStream;
49 }
50 
51 namespace sequence {
52  class SequenceSet;
53  class Sequence;
54 }
55 
56 // =================================================================================================
57 // Fasta Reader
58 // =================================================================================================
59 
60 namespace sequence {
61 
93 {
94 public:
95 
96  // ---------------------------------------------------------------------
97  // Typedefs and Enums
98  // ---------------------------------------------------------------------
99 
103  enum class ParsingMethod
104  {
117  kDefault,
118 
133  kPedantic
134  };
135 
139  enum class SiteCasing
140  {
144  kUnchanged,
145 
149  kToUpper,
150 
154  kToLower
155  };
156 
157  // ---------------------------------------------------------------------
158  // Constructor and Rule of Five
159  // ---------------------------------------------------------------------
160 
166  FastaReader();
167  ~FastaReader() = default;
168 
169  FastaReader( FastaReader const& ) = default;
170  FastaReader( FastaReader&& ) = default;
171 
172  FastaReader& operator= ( FastaReader const& ) = default;
173  FastaReader& operator= ( FastaReader&& ) = default;
174 
175  // ---------------------------------------------------------------------
176  // Reading
177  // ---------------------------------------------------------------------
178 
186  SequenceSet read( std::shared_ptr< utils::BaseInputSource > source ) const;
187 
198  void read( std::shared_ptr< utils::BaseInputSource > source, SequenceSet& sequence_set ) const;
199 
200  // ---------------------------------------------------------------------
201  // Parsing
202  // ---------------------------------------------------------------------
203 
210  void parse_document(
211  utils::InputStream& input_stream,
212  SequenceSet& sequence_set
213  ) const;
214 
226  bool parse_sequence(
227  utils::InputStream& input_stream,
228  Sequence& sequence
229  ) const;
230 
246  bool parse_sequence_pedantic(
247  utils::InputStream& input_stream,
248  Sequence& sequence
249  ) const;
250 
251  // ---------------------------------------------------------------------
252  // Properties
253  // ---------------------------------------------------------------------
254 
261  FastaReader& parsing_method( ParsingMethod value );
262 
268  ParsingMethod parsing_method() const;
269 
277  FastaReader& site_casing( SiteCasing value );
278 
282  SiteCasing site_casing() const;
283 
291  FastaReader& guess_abundances( bool value );
292 
296  bool guess_abundances() const;
297 
315  FastaReader& valid_chars( std::string const& chars );
316 
322  std::string valid_chars() const;
323 
330  utils::CharLookup<bool>& valid_char_lookup();
331 
332  // ---------------------------------------------------------------------
333  // Members
334  // ---------------------------------------------------------------------
335 
336 private:
337 
338  ParsingMethod parsing_method_ = ParsingMethod::kDefault;
339 
340  SiteCasing site_casing_ = SiteCasing::kToUpper;
341  bool guess_abundances_ = false;
342  bool use_validation_ = false;
343  utils::CharLookup<bool> lookup_;
344 
345 };
346 
347 } // namespace sequence
348 } // namespace genesis
349 
350 #endif // include guard
Read Fasta sequence data.
ParsingMethod
Enumeration of the available methods for parsing Fasta sequences.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Store a set of Sequences.
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
Default option, simply calculate the site entropy using the characters used in the SiteCounts object...
Stream interface for reading data from an InputSource, that keeps track of line and column counters...