|
A library for working with phylogenetic and population genetic data.
v0.27.0
|
|
Go to the documentation of this file.
74 std::shared_ptr< utils::BaseInputSource > source,
91 sequence_set.
add( tmp_seq );
115 if( !input_stream ) {
137 auto& it = input_stream;
140 if( !it || *it !=
'@' ) {
141 throw std::runtime_error(
142 "Malformed Fastq " + it.source_name()
143 +
": Expecting '@' at beginning of sequence at line "
147 assert( it && *it ==
'@' );
153 throw std::runtime_error(
154 "Malformed Fastq " + it.source_name()
155 +
": Expecting label after '@' in sequence at line "
164 if( !it || *it !=
'\n' ) {
165 throw std::runtime_error(
166 "Malformed Fastq " + it.source_name()
167 +
": Unexpected characters at the end of the label line in sequence at line "
171 assert( it && *it ==
'\n' );
175 sequence.
label( label1 );
181 auto& it = input_stream;
186 throw std::runtime_error(
187 "Malformed Fastq " + it.source_name()
188 +
": Expecting a sequence sites line after the first label line at line "
197 while( it && *it !=
'+' ) {
201 assert( it.column() == 1 );
204 it.get_line( buffer_ );
206 assert( !it || *it ==
'+' );
208 if( buffer_.length() == 0 ) {
209 throw std::runtime_error(
210 "Malformed Fastq " + it.source_name() +
": Empty sequence at line "
223 if( use_validation_ ) {
224 for(
auto const& c : buffer_ ) {
226 throw std::runtime_error(
227 "Malformed Fastq " + it.source_name() +
": Invalid sequence symbol "
229 +
" in sequence near line " +
std::to_string( it.line() - 1 ) +
"."
236 sequence.
sites( buffer_ );
241 auto& it = input_stream;
245 if( !it || *it !=
'+' ) {
246 throw std::runtime_error(
247 "Malformed Fastq " + it.source_name()
248 +
": Expecting '+' at beginning of sequence at line "
252 assert( it && *it ==
'+' );
258 it.get_line( buffer_ );
260 if( ! buffer_.empty() && buffer_ != sequence.
label() ) {
261 throw std::runtime_error(
262 "Malformed Fastq " + it.source_name() +
": Expecting the second label line to either " +
263 "be empty or equal to the first label line at line " +
std::to_string( it.line() ) +
"."
270 auto& it = input_stream;
275 throw std::runtime_error(
276 "Malformed Fastq " + it.source_name()
277 +
": Expecting quality scores after the second label line at line "
285 while( it && buffer_.size() < sequence.
sites().size() ) {
289 assert( it.column() == 1 );
292 it.get_line( buffer_ );
294 assert( !it || buffer_.size() >= sequence.
sites().size() );
296 if( buffer_.size() != sequence.
sites().size() ) {
297 throw std::runtime_error(
298 "Malformed Fastq " + it.source_name()
299 +
": Expecting the quality scores to be of the same length as the sequence at line "
305 if( quality_string_plugin_ ) {
306 quality_string_plugin_( buffer_, sequence );
316 site_casing_ = value;
327 if( chars.size() == 0 ) {
329 use_validation_ =
false;
333 use_validation_ =
true;
343 if( ! use_validation_ || lookup_.
all_equal_to(
true ) ) {
357 quality_encoding_ = encoding;
363 return quality_encoding_;
368 quality_string_plugin_ = plugin;
@ kToLower
Make all sites lower case.
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one....
Provides functions for accessing the file system.
void to_upper_ascii_inplace(std::string &str)
Turn the given string to all-uppercase, ASCII-only, inline.
void parse_label1_(utils::InputStream &input_stream, Sequence &sequence) const
Parse the first label line (starting with an @).
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
bool all_equal_to(T comp_value) const
Return whether all chars compare equal to a given value.
@ kToUpper
Make all sites upper case.
void to_lower_ascii_inplace(std::string &str)
Turn the given string to all-lowercase, ASCII-only.
SequenceSet read(std::shared_ptr< utils::BaseInputSource > source) const
Read all Sequences from an input source in Fastq format and return them as a SequenceSet.
Read Fastq sequence data.
Provides some valuable additions to STD.
void parse_label2_(utils::InputStream &input_stream, Sequence &sequence) const
Parse the second label line (starting with a +, and either empty or equal to the first).
std::string to_string(GenomeLocus const &locus)
Provides some commonly used string utility functions.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
QualityEncoding
List of quality encodings for which we support decoding.
std::string get_chars_equal_to(T comp_value) const
Return a std::string containg all chars which have lookup status equal to a given value.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
void parse_sites_(utils::InputStream &input_stream, Sequence &sequence) const
Parse the sequence line(s).
Store a set of Sequences.
void parse_document(utils::InputStream &input_stream, SequenceSet &sequence_set) const
Parse a whole fastq document into a SequenceSet.
void parse_quality_(utils::InputStream &input_stream, Sequence &sequence) const
Parse the quality score line(s), which also runs the plugin, if available.
reference add(Sequence const &s)
Add a Sequence to the SequenceSet by copying it, and return a reference to it.
FastqReader()
Create a default FastqReader.
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
FastqReader & quality_string_plugin(quality_string_function const &plugin)
Functional that can be set to process the quality string found in fastq files.
bool parse_sequence_(utils::InputStream &input_stream, Sequence &sequence) const
Parse a fastq sequence into the given sequence object.
void set_all(T value)
Set the lookup status for all chars at once.
std::function< void(std::string const &quality_string, Sequence &sequence) > quality_string_function
Function type that allows to work with the quality line(s) in fastq files.
bool parse_sequence(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fastq format.
QualityEncoding quality_encoding()
Return the currently set QualityEncoding that is used for decoding the quality score line of the Fast...
void set_selection(std::string const &chars, T value)
Set the lookup status for all chars that are contained in a given std::string.