A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
fasta_reader.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
2 #define GENESIS_SEQUENCE_FORMATS_FASTA_READER_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <iosfwd>
37 #include <string>
38 
39 namespace genesis {
40 
41 // =================================================================================================
42 // Forward Declarations
43 // =================================================================================================
44 
45 namespace utils {
46  class InputStream;
47 }
48 
49 namespace sequence {
50  class SequenceSet;
51  class Sequence;
52 }
53 
54 // =================================================================================================
55 // Fasta Reader
56 // =================================================================================================
57 
58 namespace sequence {
59 
96 {
97 public:
98 
99  // ---------------------------------------------------------------------
100  // Typedefs and Enums
101  // ---------------------------------------------------------------------
102 
106  enum class ParsingMethod
107  {
124  kDefault,
125 
140  kPedantic
141  };
142 
143  // ---------------------------------------------------------------------
144  // Constructor and Rule of Five
145  // ---------------------------------------------------------------------
146 
152  FastaReader();
153  ~FastaReader() = default;
154 
155  FastaReader( FastaReader const& ) = default;
156  FastaReader( FastaReader&& ) = default;
157 
158  FastaReader& operator= ( FastaReader const& ) = default;
159  FastaReader& operator= ( FastaReader&& ) = default;
160 
161  // ---------------------------------------------------------------------
162  // Reading
163  // ---------------------------------------------------------------------
164 
172  void from_stream( std::istream& input_stream, SequenceSet& sequence_set ) const;
173 
178  SequenceSet from_stream( std::istream& input_stream ) const;
179 
187  void from_file( std::string const& file_name, SequenceSet& sequence_set ) const;
188 
193  SequenceSet from_file( std::string const& file_name ) const;
194 
202  void from_string( std::string const& input_string, SequenceSet& sequence_set ) const;
203 
208  SequenceSet from_string( std::string const& input_string ) const;
209 
210  // ---------------------------------------------------------------------
211  // Parsing
212  // ---------------------------------------------------------------------
213 
220  void parse_document(
221  utils::InputStream& input_stream,
222  SequenceSet& sequence_set
223  ) const;
224 
241  bool parse_sequence(
242  utils::InputStream& input_stream,
243  Sequence& sequence
244  ) const;
245 
262  utils::InputStream& input_stream,
263  Sequence& sequence
264  ) const;
265 
266  // ---------------------------------------------------------------------
267  // Properties
268  // ---------------------------------------------------------------------
269 
277 
284 
292  FastaReader& to_upper( bool value );
293 
297  bool to_upper() const;
298 
316  FastaReader& valid_chars( std::string const& chars );
317 
323  std::string valid_chars() const;
324 
332 
333  // ---------------------------------------------------------------------
334  // Members
335  // ---------------------------------------------------------------------
336 
337 private:
338 
339  ParsingMethod parsing_method_ = ParsingMethod::kDefault;
340 
341  bool to_upper_ = true;
342  bool use_validation_ = false;
343  utils::CharLookup<bool> lookup_;
344 
345 };
346 
347 } // namespace sequence
348 } // namespace genesis
349 
350 #endif // include guard
Read Fasta sequence data.
bool parse_sequence(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
void from_string(std::string const &input_string, SequenceSet &sequence_set) const
Read all Sequences from a std::string in Fasta format into a SequenceSet.
ParsingMethod
Enumeration of the available methods for parsing Fasta sequences.
bool parse_sequence_pedantic(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fasta format.
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
void parse_document(utils::InputStream &input_stream, SequenceSet &sequence_set) const
Parse a whole fasta document into a SequenceSet.
FastaReader & operator=(FastaReader const &)=default
void from_stream(std::istream &input_stream, SequenceSet &sequence_set) const
Read all Sequences from a std::istream in Fasta format into a SequenceSet.
Store a set of Sequences.
ParsingMethod parsing_method() const
Return the currently set parsing method.
bool to_upper() const
Return whether Sequence sites are automatically turned into upper case.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
FastaReader()
Create a default FastaReader. Per default, chars are turned upper case, but not validated.
void from_file(std::string const &file_name, SequenceSet &sequence_set) const
Read all Sequences from a file in Fasta format into a SequenceSet.
Stream interface for reading data from an InputSource, that keeps track of line and column counters...