40 namespace population {
46 VariantParallelInputIterator::Iterator::Iterator(
47 VariantParallelInputIterator* generator
49 : generator_(generator)
61 iterators_.reserve( generator_->inputs_.size() );
62 variant_sizes_.reserve( generator_->inputs_.size() );
63 for(
size_t i = 0; i < generator_->inputs_.size(); ++i ) {
64 iterators_.emplace_back( generator_->inputs_[i].begin() );
71 auto const sample_name_count = generator_->inputs_[i].data().sample_names.size();
73 variant_sizes_.push_back( iterators_[i]->samples.size() );
76 if( sample_name_count > 0 && iterators_[i]->samples.size() != sample_name_count ) {
77 throw std::runtime_error(
78 "Input source for VariantParallelInputIterator contains " +
79 std::to_string( iterators_[i]->samples.size() ) +
" samples, but its sample " +
80 "name list contains " +
std::to_string( sample_name_count ) +
" names."
87 assert_correct_chr_and_pos_( iterators_[i] );
95 variant_sizes_.push_back( sample_name_count );
100 variant_size_sum_ = std::accumulate(
101 variant_sizes_.begin(),
102 variant_sizes_.end(),
103 decltype( variant_sizes_ )::value_type( 0 )
107 variants_.resize( generator_->inputs_.size() );
110 assert( iterators_.size() == generator_->inputs_.size() );
111 assert( iterators_.size() == variants_.size() );
112 assert( iterators_.size() == variant_sizes_.size() );
115 carrying_locus_it_ = generator_->carrying_loci_.cbegin();
125 Variant VariantParallelInputIterator::Iterator::joined_variant(
126 bool allow_ref_base_mismatches,
127 bool allow_alt_base_mismatches,
130 assert( iterators_.size() == variants_.size() );
131 assert( iterators_.size() == variant_sizes_.size() );
132 assert( iterators_.size() == generator_->inputs_.size() );
137 res.
position = current_locus_.position;
138 res.
samples.reserve( variant_size_sum_ );
141 if( variants_.empty() ) {
144 assert( variants_.size() > 0 );
145 assert( variant_sizes_.size() > 0 );
150 bool bases_init =
false;
154 for(
size_t i = 0; i < variants_.size(); ++i ) {
161 assert( variants_[i]->chromosome == res.
chromosome );
162 assert( variants_[i]->position == res.
position );
163 assert( variants_[i]->samples.size() == variant_sizes_[i] );
178 }
else if( allow_ref_base_mismatches ) {
181 throw std::runtime_error(
182 "Mismatching reference bases while iterating input sources in parallel at " +
183 to_string( current_locus_ ) +
". Some sources have base '" +
185 std::string( 1, variants_[i]->reference_base ) +
"'."
192 }
else if( allow_alt_base_mismatches ) {
195 throw std::runtime_error(
196 "Mismatching alternative bases while iterating input sources in parallel at " +
197 to_string( current_locus_ ) +
". Some sources have base '" +
199 std::string( 1, variants_[i]->alternative_base ) +
"'."
209 std::begin( variants_[i]->samples ),
210 std::end( variants_[i]->samples ),
211 std::back_inserter( res.
samples )
213 variants_[i]->samples.clear();
217 std::begin( variants_[i]->samples ),
218 std::end( variants_[i]->samples ),
219 std::back_inserter( res.
samples )
228 for(
size_t k = 0; k < variant_sizes_[i]; ++k ) {
240 carrying_locus_it_ != generator_->carrying_loci_.cend() &&
247 assert( res.
samples.size() == variant_size_sum_ );
261 void VariantParallelInputIterator::Iterator::advance_using_carrying_()
268 assert( iterators_.size() == generator_->selections_.size() );
269 for(
size_t i = 0; i < iterators_.size(); ++i ) {
270 auto& iterator = iterators_[i];
271 if( ! iterator || generator_->selections_[i] != ContributionType::kCarrying ) {
282 iterator->chromosome, iterator->position, current_locus_
288 if(
locus_equal( iterator->chromosome, iterator->position, current_locus_ )) {
289 increment_iterator_( iterator );
302 locus_less( iterator->chromosome, iterator->position, cand_loc )
305 cand_loc = GenomeLocus{ iterator->
chromosome, iterator->position };
310 assert( generator_ );
311 if( carrying_locus_it_ != generator_->carrying_loci_.cend() ) {
313 assert( ! carrying_locus_it_->empty() );
318 if(
locus_equal( *carrying_locus_it_, current_locus_ ) ) {
319 ++carrying_locus_it_;
327 carrying_locus_it_ != generator_->carrying_loci_.cend() &&
333 cand_loc = *carrying_locus_it_;
339 if( cand_loc.
empty() ) {
340 assert( generator_ );
341 assert( generator_->has_carrying_input_ );
345 for(
size_t i = 0; i < iterators_.size(); ++i ) {
346 if( generator_->selections_[i] == ContributionType::kCarrying && iterators_[i] ) {
355 assert( carrying_locus_it_ == generator_->carrying_loci_.cend() );
358 generator_ =
nullptr;
364 assert( cand_loc > current_locus_ );
368 for(
size_t i = 0; i < iterators_.size(); ++i ) {
369 auto& iterator = iterators_[i];
378 iterator->chromosome, iterator->position, current_locus_
386 while( iterator &&
locus_less( iterator->chromosome, iterator->position, cand_loc )) {
387 increment_iterator_( iterator );
391 assert( generator_->selections_[i] != ContributionType::kCarrying || cnt <= 1 );
396 current_locus_ = cand_loc;
404 void VariantParallelInputIterator::Iterator::advance_using_only_following_()
408 assert( carrying_locus_it_ == generator_->carrying_loci_.cend() );
409 assert( generator_->carrying_loci_.empty() );
413 bool one_at_end =
false;
417 assert( iterators_.size() == generator_->selections_.size() );
418 if( ! current_locus_.empty() ) {
419 for(
size_t i = 0; i < iterators_.size(); ++i ) {
420 auto& iterator = iterators_[i];
423 assert( generator_->selections_[i] == ContributionType::kFollowing );
433 assert(
locus_equal( iterator->chromosome, iterator->position, current_locus_ ));
434 increment_iterator_( iterator );
446 GenomeLocus cand_loc;
451 bool found_locus =
false;
452 while( ! found_locus && ! one_at_end ) {
458 for(
size_t i = 0; i < iterators_.size(); ++i ) {
459 auto& iterator = iterators_[i];
462 assert( generator_->selections_[i] == ContributionType::kFollowing );
470 assert( current_locus_.empty() );
476 if( cand_loc.empty() ) {
478 cand_loc = GenomeLocus{ iterator->chromosome, iterator->position };
483 while( iterator &&
locus_less( iterator->chromosome, iterator->position, cand_loc )) {
484 increment_iterator_( iterator );
500 if(
locus_greater( iterator->chromosome, iterator->position, cand_loc )) {
501 cand_loc = GenomeLocus{ iterator->chromosome, iterator->position };
508 assert(
locus_equal( iterator->chromosome, iterator->position, cand_loc ));
513 assert( iterators_.size() == 0 || ( found_locus ^ one_at_end ));
518 assert( ! generator_->has_carrying_input_ );
519 generator_ =
nullptr;
525 size_t at_end_cnt = 0;
526 for(
size_t i = 0; i < iterators_.size(); ++i ) {
527 if( ! iterators_[i] ) {
531 return at_end_cnt == 1;
540 assert( iterators_.size() == 0 || cand_loc > current_locus_ );
543 assert( iterators_.size() == 0 || found_locus );
545 for(
size_t i = 0; i < iterators_.size(); ++i ) {
546 auto const& iterator = iterators_[i];
547 if( ! iterator || !
locus_equal( iterator->chromosome, iterator->position, cand_loc )) {
556 current_locus_ = cand_loc;
564 void VariantParallelInputIterator::Iterator::increment_iterator_(
565 VariantInputIterator::Iterator& iterator
580 auto const prev_loc = GenomeLocus{ iterator->chromosome, iterator->position };
590 assert_correct_chr_and_pos_( iterator );
592 throw std::runtime_error(
593 "Cannot iterate multiple input sources in parallel, as (at least) "
594 "one of them is not sorted by chromosome and position. "
595 "Offending input source: " + iterator.data().source_name +
" at " +
605 void VariantParallelInputIterator::Iterator::assert_correct_chr_and_pos_(
606 VariantInputIterator::Iterator
const& iterator
612 if( iterator->chromosome.empty() || iterator->position == 0 ) {
613 throw std::runtime_error(
614 "Cannot iterate multiple input sources in parallel, as (at least) "
615 "one of them has an invalid chromosome (empty name) or position (0). "
616 "Offending input source: " + iterator.data().source_name +
" at " +
626 void VariantParallelInputIterator::Iterator::update_variants_()
628 assert( iterators_.size() == variants_.size() );
629 for(
size_t i = 0; i < iterators_.size(); ++i ) {
630 auto& iterator = iterators_[i];
640 if(
locus_equal( iterator->chromosome, iterator->position, current_locus_ )) {
653 auto tmp_samples = std::move( iterator->samples );
654 iterator->samples.clear();
658 variants_[i] = *iterator;
659 variants_[i]->samples = std::move( tmp_samples );
664 if( variants_[i]->samples.size() != variant_sizes_[i] ) {
665 throw std::runtime_error(
666 "Cannot iterate multiple input sources in parallel, as (at least) "
667 "one of them has an inconsistent number of samples. "
668 "Offending input source: " + iterator.data().source_name +
" at " +
669 iterator->chromosome +
":" +
std::to_string( iterator->position ) +
". " +
671 " samples (based on the first used line of input of that source), " +
673 " at the indicated locus."
683 iterator->chromosome, iterator->position, current_locus_