A library for working with phylogenetic and population genetic data.
v0.27.0
fasta_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <iosfwd>
38 #include <memory>
39 #include <string>
40 
41 namespace genesis {
42 
43 // =================================================================================================
44 // Forward Declarations
45 // =================================================================================================
46 
47 namespace utils {
48  class InputStream;
49 }
50 
51 namespace sequence {
52  class SequenceSet;
53  class Sequence;
54 }
55 
56 // =================================================================================================
57 // Fasta Reader
58 // =================================================================================================
59 
60 namespace sequence {
61 
93 {
94 public:
95 
96  // ---------------------------------------------------------------------
97  // Typedefs and Enums
98  // ---------------------------------------------------------------------
99 
103  enum class ParsingMethod
104  {
117  kDefault,
118 
133  kPedantic
134  };
135 
139  enum class SiteCasing
140  {
144  kUnchanged,
145 
149  kToUpper,
150 
154  kToLower
155  };
156 
157  // ---------------------------------------------------------------------
158  // Constructor and Rule of Five
159  // ---------------------------------------------------------------------
160 
166  FastaReader();
167  ~FastaReader() = default;
168 
169  FastaReader( FastaReader const& ) = default;
170  FastaReader( FastaReader&& ) = default;
171 
172  FastaReader& operator= ( FastaReader const& ) = default;
173  FastaReader& operator= ( FastaReader&& ) = default;
174 
175  // ---------------------------------------------------------------------
176  // Reading
177  // ---------------------------------------------------------------------
178 
186  SequenceSet read( std::shared_ptr< utils::BaseInputSource > source ) const;
187 
198  void read( std::shared_ptr< utils::BaseInputSource > source, SequenceSet& sequence_set ) const;
199 
200  // ---------------------------------------------------------------------
201  // Parsing
202  // ---------------------------------------------------------------------
203 
210  void parse_document(
211  utils::InputStream& input_stream,
212  SequenceSet& sequence_set
213  ) const;
214 
226  bool parse_sequence(
227  utils::InputStream& input_stream,
228  Sequence& sequence
229  ) const;
230 
247  utils::InputStream& input_stream,
248  Sequence& sequence
249  ) const;
250 
251  // ---------------------------------------------------------------------
252  // Properties
253  // ---------------------------------------------------------------------
254 
262 
269 
278 
282  SiteCasing site_casing() const;
283 
291  FastaReader& guess_abundances( bool value );
292 
296  bool guess_abundances() const;
297 
315  FastaReader& valid_chars( std::string const& chars );
316 
322  std::string valid_chars() const;
323 
331 
332  // ---------------------------------------------------------------------
333  // Members
334  // ---------------------------------------------------------------------
335 
336 private:
337 
338  ParsingMethod parsing_method_ = ParsingMethod::kDefault;
339 
340  SiteCasing site_casing_ = SiteCasing::kToUpper;
341  bool guess_abundances_ = false;
342  bool use_validation_ = false;
343  utils::CharLookup<bool> lookup_;
344 
345 };
346 
347 } // namespace sequence
348 } // namespace genesis
349 
350 #endif // include guard
genesis::sequence::FastaReader::SiteCasing::kToLower
@ kToLower
Make all sites lower case.
genesis::utils::InputStream
Stream interface for reading data from an InputSource, that keeps track of line and column counters.
Definition: input_stream.hpp:81
genesis::sequence::FastaReader::ParsingMethod::kPedantic
@ kPedantic
Pedantic method.
genesis::sequence::FastaReader::SiteCasing
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
Definition: fasta_reader.hpp:139
genesis::sequence::Sequence
Definition: sequence/sequence.hpp:40
genesis::sequence::FastaReader
Read Fasta sequence data.
Definition: fasta_reader.hpp:92
genesis::sequence::FastaReader::parse_sequence_pedantic
bool parse_sequence_pedantic(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
Definition: fasta_reader.cpp:236
input_source.hpp
genesis::sequence::FastaReader::~FastaReader
~FastaReader()=default
char_lookup.hpp
genesis::sequence::FastaReader::guess_abundances
bool guess_abundances() const
Return whether the label is used to guess/extracat Sequence abundances.
Definition: fasta_reader.cpp:401
genesis::sequence::FastaReader::ParsingMethod::kDefault
@ kDefault
Fast method, used by default.
genesis::sequence::FastaReader::FastaReader
FastaReader()
Create a default FastaReader. Per default, chars are turned upper case, but not validated.
Definition: fasta_reader.cpp:56
genesis::utils::CharLookup< bool >
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::SequenceSet
Store a set of Sequences.
Definition: sequence_set.hpp:59
genesis::sequence::FastaReader::site_casing
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
Definition: fasta_reader.cpp:390
genesis::sequence::FastaReader::valid_chars
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
Definition: fasta_reader.cpp:420
genesis::sequence::FastaReader::operator=
FastaReader & operator=(FastaReader const &)=default
genesis::sequence::FastaReader::valid_char_lookup
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
Definition: fasta_reader.cpp:431
genesis::sequence::FastaReader::read
SequenceSet read(std::shared_ptr< utils::BaseInputSource > source) const
Read all Sequences from an input source in Fasta format and return them as a SequenceSet.
Definition: fasta_reader.cpp:65
genesis::sequence::FastaReader::parsing_method
ParsingMethod parsing_method() const
Return the currently set parsing method.
Definition: fasta_reader.cpp:379
genesis::sequence::FastaReader::parse_sequence
bool parse_sequence(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
Definition: fasta_reader.cpp:107
genesis::sequence::FastaReader::SiteCasing::kToUpper
@ kToUpper
Make all sites upper case.
genesis::sequence::FastaReader::SiteCasing::kUnchanged
@ kUnchanged
Do not change the case of the sites.
genesis::sequence::FastaReader::ParsingMethod
ParsingMethod
Enumeration of the available methods for parsing Fasta sequences.
Definition: fasta_reader.hpp:103
genesis::sequence::FastaReader::parse_document
void parse_document(utils::InputStream &input_stream, SequenceSet &sequence_set) const
Parse a whole fasta document into a SequenceSet.
Definition: fasta_reader.cpp:85