1 #ifndef GENESIS_SEQUENCE_FORMATS_FASTX_INPUT_VIEW_STREAM_H_
2 #define GENESIS_SEQUENCE_FORMATS_FASTX_INPUT_VIEW_STREAM_H_
34 #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
49 #include <string_view>
59 class FastxInputViewStream;
60 using FastaInputViewStream = FastxInputViewStream;
61 using FastqInputViewStream = FastxInputViewStream;
101 class FastxInputViewStream
109 using self_type = FastxInputViewStream;
110 using value_type = Sequence;
111 using pointer = value_type*;
112 using reference = value_type&;
113 using difference_type = std::ptrdiff_t;
114 using iterator_category = std::input_iterator_tag;
136 using self_type = FastxInputViewStream::Iterator;
137 using value_type = std::array<std::string_view, 4>;
138 using pointer = value_type
const*;
139 using reference = value_type
const&;
140 using difference_type = std::ptrdiff_t;
141 using iterator_category = std::input_iterator_tag;
149 Iterator() =
default;
151 Iterator( FastxInputViewStream
const* parent )
160 input_stream_ = std::make_shared<utils::InputStream>( parent_->input_source_ );
164 if( ! input_stream_ || ! *input_stream_ ) {
166 input_stream_ =
nullptr;
167 sequence_view_ = std::array<std::string_view, 4>();
172 if( **input_stream_ ==
'>' ) {
173 format_is_fasta_ =
true;
174 }
else if( **input_stream_ ==
'@' ) {
175 format_is_fasta_ =
false;
177 throw std::runtime_error(
178 "Malformed fasta/fastq " + input_stream_->source_name() +
179 ", starting with neither '>' nor '@', but instead " +
190 ~Iterator() =
default;
192 Iterator( self_type
const& ) =
default;
193 Iterator( self_type&& ) =
default;
195 Iterator& operator= ( self_type
const& ) =
default;
196 Iterator& operator= ( self_type&& ) =
default;
198 friend FastxInputViewStream;
204 self_type
const* operator->()
const
209 self_type* operator->()
214 self_type
const& operator*()
const
219 self_type& operator*()
228 self_type& operator ++ ()
245 return parent_ == it.parent_;
250 return !(*
this == it);
262 std::string_view
const& label()
const
264 return sequence_view_[0];
273 std::string_view
const& label1()
const
275 return sequence_view_[0];
284 std::string_view
const& sites()
const
286 return sequence_view_[1];
295 std::string_view
const& label2()
const
297 return sequence_view_[2];
307 std::string_view
const& quality()
const
309 return sequence_view_[3];
324 if( format_is_fasta_ ) {
331 void increment_fasta_()
337 if( ! input_stream_ || ! *input_stream_ ) {
339 input_stream_ =
nullptr;
340 sequence_view_ = std::array<std::string_view, 4>();
349 auto seqs = input_stream_->get_line_views<2>();
350 sequence_view_[0] = seqs[0];
351 sequence_view_[1] = seqs[1];
352 }
catch( std::exception
const& ex ) {
353 throw std::runtime_error(
354 "Cannot stream through fasta " + input_stream_->source_name() +
355 " with fast string view parser, either because the file is corrupt, "
356 "or has lines that are too long. Error: " + ex.what()
361 if( sequence_view_[0].size() < 1 || sequence_view_[0][0] !=
'>' ) {
362 throw std::runtime_error(
363 "Malformed fasta " + input_stream_->source_name() +
": Expecting '>' at "
364 "beginning of label near line " +
std::to_string( input_stream_->line() ) +
365 ". Note that we here can only process fasta with single lines for the " +
366 "sequence and quality data."
369 sequence_view_[0].remove_prefix( 1 );
372 if( sequence_view_[1].empty() ) {
373 throw std::runtime_error(
374 "Malformed fasta " + input_stream_->source_name() +
": Expecting a " +
375 "sequence sites line after the first label line near line "
377 ". Note that we here can only process fasta with single lines for the " +
378 "sequence and quality data."
383 void increment_fastq_()
389 if( ! input_stream_ || ! *input_stream_ ) {
391 input_stream_ =
nullptr;
392 sequence_view_ = std::array<std::string_view, 4>();
398 sequence_view_ = input_stream_->get_line_views<4>();
399 }
catch( std::exception
const& ex ) {
400 throw std::runtime_error(
401 "Cannot stream through fastq " + input_stream_->source_name() +
402 " with fast string view parser, either because the file is corrupt, "
403 "or has lines that are too long. Error: " + ex.what()
408 if( sequence_view_[0].size() < 1 || sequence_view_[0][0] !=
'@' ) {
409 throw std::runtime_error(
410 "Malformed fastq " + input_stream_->source_name() +
": Expecting '@' at "
411 "beginning of label near line " +
std::to_string( input_stream_->line() ) +
412 ". Note that we here can only process fastq with single lines for the " +
413 "sequence and quality data."
416 sequence_view_[0].remove_prefix( 1 );
419 if( sequence_view_[2].size() < 1 || sequence_view_[2][0] !=
'+' ) {
420 throw std::runtime_error(
421 "Malformed fastq " + input_stream_->source_name() +
": Expecting '+' at "
422 "beginning of label near line " +
std::to_string( input_stream_->line() ) +
423 ". Note that we here can only process fastq with single lines for the " +
424 "sequence and quality data."
427 sequence_view_[2].remove_prefix( 1 );
430 if( sequence_view_[1].empty() ) {
431 throw std::runtime_error(
432 "Malformed fastq " + input_stream_->source_name() +
": Expecting a " +
433 "sequence sites line after the first label line near line "
435 ". Note that we here can only process fastq with single lines for the " +
436 "sequence and quality data."
439 if( sequence_view_[1].size() != sequence_view_[3].size() ) {
440 throw std::runtime_error(
441 "Malformed fastq " + input_stream_->source_name() +
": Expecting the " +
442 "quality scores to be of the same length as the sequence near line " +
444 ". Note that we here can only process fastq with single lines for the " +
445 "sequence and quality data."
457 FastxInputViewStream
const* parent_ =
nullptr;
460 std::shared_ptr<utils::InputStream> input_stream_;
463 bool format_is_fasta_;
466 std::array<std::string_view, 4> sequence_view_;
480 FastxInputViewStream()
481 : input_source_( nullptr )
487 explicit FastxInputViewStream(
488 std::shared_ptr<utils::BaseInputSource> source
490 : input_source_( source )
493 ~FastxInputViewStream() =
default;
495 FastxInputViewStream( self_type
const& ) =
default;
496 FastxInputViewStream( self_type&& ) =
default;
498 self_type& operator= ( self_type
const& ) =
default;
499 self_type& operator= ( self_type&& ) =
default;
505 Iterator begin()
const
507 return Iterator(
this );
519 std::shared_ptr<utils::BaseInputSource> input_source()
const
521 return input_source_;
530 std::shared_ptr<utils::BaseInputSource> input_source_;
536 #endif // ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
537 #endif // include guard