A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fasta_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <string>
38 
39 namespace genesis {
40 
41 // =================================================================================================
42 // Forward Declarations
43 // =================================================================================================
44 
45 namespace utils {
46  class InputStream;
47 }
48 
49 namespace sequence {
50  class SequenceSet;
51  class Sequence;
52 }
53 
54 // =================================================================================================
55 // Fasta Reader
56 // =================================================================================================
57 
58 namespace sequence {
59 
95 {
96 public:
97 
98  // ---------------------------------------------------------------------
99  // Typedefs and Enums
100  // ---------------------------------------------------------------------
101 
105  enum class ParsingMethod
106  {
123  kDefault,
124 
139  kPedantic
140  };
141 
145  enum class SiteCasing
146  {
150  kUnchanged,
151 
155  kToUpper,
156 
160  kToLower
161  };
162 
163  // ---------------------------------------------------------------------
164  // Constructor and Rule of Five
165  // ---------------------------------------------------------------------
166 
172  FastaReader();
173  ~FastaReader() = default;
174 
175  FastaReader( FastaReader const& ) = default;
176  FastaReader( FastaReader&& ) = default;
177 
178  FastaReader& operator= ( FastaReader const& ) = default;
179  FastaReader& operator= ( FastaReader&& ) = default;
180 
181  // ---------------------------------------------------------------------
182  // Reading
183  // ---------------------------------------------------------------------
184 
192  void from_stream( std::istream& input_stream, SequenceSet& sequence_set ) const;
193 
198  SequenceSet from_stream( std::istream& input_stream ) const;
199 
207  void from_file( std::string const& file_name, SequenceSet& sequence_set ) const;
208 
213  SequenceSet from_file( std::string const& file_name ) const;
214 
222  void from_string( std::string const& input_string, SequenceSet& sequence_set ) const;
223 
228  SequenceSet from_string( std::string const& input_string ) const;
229 
230  // ---------------------------------------------------------------------
231  // Parsing
232  // ---------------------------------------------------------------------
233 
240  void parse_document(
241  utils::InputStream& input_stream,
242  SequenceSet& sequence_set
243  ) const;
244 
261  bool parse_sequence(
262  utils::InputStream& input_stream,
263  Sequence& sequence
264  ) const;
265 
282  utils::InputStream& input_stream,
283  Sequence& sequence
284  ) const;
285 
286  // ---------------------------------------------------------------------
287  // Properties
288  // ---------------------------------------------------------------------
289 
297 
304 
313 
317  SiteCasing site_casing() const;
318 
336  FastaReader& valid_chars( std::string const& chars );
337 
343  std::string valid_chars() const;
344 
352 
353  // ---------------------------------------------------------------------
354  // Members
355  // ---------------------------------------------------------------------
356 
357 private:
358 
359  ParsingMethod parsing_method_ = ParsingMethod::kDefault;
360 
361  SiteCasing site_casing_ = SiteCasing::kToUpper;
362  bool use_validation_ = false;
363  utils::CharLookup<bool> lookup_;
364 
365 };
366 
367 } // namespace sequence
368 } // namespace genesis
369 
370 #endif // include guard
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
Read Fasta sequence data.
bool parse_sequence(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
void from_string(std::string const &input_string, SequenceSet &sequence_set) const
Read all Sequences from a std::string in Fasta format into a SequenceSet.
ParsingMethod
Enumeration of the available methods for parsing Fasta sequences.
bool parse_sequence_pedantic(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
Do not change the case of the sites.
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
void parse_document(utils::InputStream &input_stream, SequenceSet &sequence_set) const
Parse a whole fasta document into a SequenceSet.
FastaReader & operator=(FastaReader const &)=default
void from_stream(std::istream &input_stream, SequenceSet &sequence_set) const
Read all Sequences from a std::istream in Fasta format into a SequenceSet.
Store a set of Sequences.
ParsingMethod parsing_method() const
Return the currently set parsing method.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
FastaReader()
Create a default FastaReader. Per default, chars are turned upper case, but not validated.
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
void from_file(std::string const &file_name, SequenceSet &sequence_set) const
Read all Sequences from a file in Fasta format into a SequenceSet.
Stream interface for reading data from an InputSource, that keeps track of line and column counters...