74 std::shared_ptr< utils::BaseInputSource > source,
92 sequence_set.
add( tmp_seq );
100 static std::string buffer;
117 if( !input_stream ) {
142 auto& it = input_stream;
145 if( !it || *it !=
'@' ) {
146 throw std::runtime_error(
147 "Malformed Fastq " + it.source_name()
148 +
": Expecting '@' at beginning of sequence at line " 152 assert( it && *it ==
'@' );
158 throw std::runtime_error(
159 "Malformed Fastq " + it.source_name()
160 +
": Expecting label after '@' in sequence at line " 169 if( !it || *it !=
'\n' ) {
170 throw std::runtime_error(
171 "Malformed Fastq " + it.source_name()
172 +
": Unexpected characters at the end of the label line in sequence at line " 176 assert( it && *it ==
'\n' );
180 sequence.
label( label1 );
186 auto& it = input_stream;
191 throw std::runtime_error(
192 "Malformed Fastq " + it.source_name()
193 +
": Expecting a sequence sites line after the first label line at line " 202 while( it && *it !=
'+' ) {
206 assert( it.column() == 1 );
209 it.get_line( buffer );
211 assert( !it || *it ==
'+' );
213 if( buffer.length() == 0 ) {
214 throw std::runtime_error(
215 "Malformed Fastq " + it.source_name() +
": Empty sequence at line " 228 if( use_validation_ ) {
229 for(
auto const& c : buffer ) {
231 throw std::runtime_error(
232 "Malformed Fastq " + it.source_name() +
": Invalid sequence symbol " 234 +
" in sequence near line " +
std::to_string( it.line() - 1 ) +
"." 241 sequence.
sites( buffer );
246 auto& it = input_stream;
250 if( !it || *it !=
'+' ) {
251 throw std::runtime_error(
252 "Malformed Fastq " + it.source_name()
253 +
": Expecting '+' at beginning of sequence at line " 257 assert( it && *it ==
'+' );
263 it.get_line( buffer );
265 if( ! buffer.empty() && buffer != sequence.
label() ) {
266 throw std::runtime_error(
267 "Malformed Fastq " + it.source_name() +
": Expecting the second label line to either " +
268 "be empty or equal to the first label line at line " +
std::to_string( it.line() ) +
"." 275 auto& it = input_stream;
280 throw std::runtime_error(
281 "Malformed Fastq " + it.source_name()
282 +
": Expecting quality scores after the second label line at line " 290 while( it && buffer.size() < sequence.
sites().size() ) {
294 assert( it.column() == 1 );
297 it.get_line( buffer );
299 assert( !it || buffer.size() >= sequence.
sites().size() );
301 if( buffer.size() != sequence.
sites().size() ) {
302 throw std::runtime_error(
303 "Malformed Fastq " + it.source_name()
304 +
": Expecting the quality scores to be of the same length as the sequence at line " 310 if( quality_string_plugin_ ) {
311 quality_string_plugin_( buffer, sequence );
321 site_casing_ = value;
332 if( chars.size() == 0 ) {
334 use_validation_ =
false;
338 use_validation_ =
true;
348 if( ! use_validation_ || lookup_.
all_equal_to(
true ) ) {
362 quality_encoding_ = encoding;
368 return quality_encoding_;
373 quality_string_plugin_ = plugin;
SiteCasing site_casing() const
Return whether Sequence sites are automatically turned into upper or lower case.
void set_all(T value)
Set the lookup status for all chars at once.
Read Fastq sequence data.
void to_upper_ascii_inplace(std::string &str)
Turn the given string to all-uppercase, ASCII-only, inline.
bool parse_sequence_(utils::InputStream &input_stream, std::string &buffer, Sequence &sequence) const
Parse a fastq sequence into the given sequence object.
bool parse_sequence(utils::InputStream &input_stream, Sequence &sequence) const
Parse a Sequence in Fastq format.
std::string get_chars_equal_to(T comp_value) const
Return a std::string containg all chars which have lookup status equal to a given value...
Make all sites upper case.
void set_selection(std::string const &chars, T value)
Set the lookup status for all chars that are contained in a given std::string.
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Make all sites lower case.
SequenceSet read(std::shared_ptr< utils::BaseInputSource > source) const
Read all Sequences from an input source in Fastq format and return them as a SequenceSet.
Provides some valuable additions to STD.
void parse_label2_(utils::InputStream &input_stream, std::string &buffer, Sequence &sequence) const
Parse the second label line (starting with a +, and either empty or equal to the first).
std::string read_while(InputStream &source, char criterion)
Lexing function that reads from the stream while its current char equals the provided one...
SiteCasing
Enumeration of casing methods to apply to each site of a Sequence.
void to_lower_ascii_inplace(std::string &str)
Turn the given string to all-lowercase, ASCII-only.
utils::CharLookup< bool > & valid_char_lookup()
Return the internal CharLookup that is used for validating the Sequence sites.
bool all_equal_to(T comp_value) const
Return whether all chars compare equal to a given value.
void parse_quality_(utils::InputStream &input_stream, std::string &buffer, Sequence &sequence) const
Parse the quality score line(s), which also runs the plugin, if available.
Provides some commonly used string utility functions.
void parse_sites_(utils::InputStream &input_stream, std::string &buffer, Sequence &sequence) const
Parse the sequence line(s).
FastqReader()
Create a default FastqReader.
Provides functions for accessing the file system.
Store a set of Sequences.
reference add(Sequence const &s)
Add a Sequence to the SequenceSet by copying it, and return a reference to it.
QualityEncoding
List of quality encodings for which we support decoding.
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
QualityEncoding quality_encoding()
Return the currently set QualityEncoding that is used for decoding the quality score line of the Fast...
void parse_label1_(utils::InputStream &input_stream, std::string &buffer, Sequence &sequence) const
Parse the first label line (starting with an @).
std::string valid_chars() const
Return the currently set chars used for validating Sequence sites.
FastqReader & quality_string_plugin(quality_string_function const &plugin)
Functional that can be set to process the quality string found in fastq files.
std::function< void(std::string const &quality_string, Sequence &sequence) > quality_string_function
Function type that allows to work with the quality line(s) in fastq files.
void parse_document(utils::InputStream &input_stream, SequenceSet &sequence_set) const
Parse a whole fastq document into a SequenceSet.