A library for working with phylogenetic and population genetic data.
v0.27.0
variant_parallel_input_iterator.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
33 #include <algorithm>
34 #include <cassert>
35 #include <iterator>
36 #include <numeric>
37 #include <stdexcept>
38 
39 namespace genesis {
40 namespace population {
41 
42 // =================================================================================================
43 // Iterator Constructors and Rule of Five
44 // =================================================================================================
45 
46 VariantParallelInputIterator::Iterator::Iterator(
47  VariantParallelInputIterator* generator
48 )
49  : generator_(generator)
50 {
51  // We use the generator as a check if this Iterator is intended
52  // to be a begin() or end() iterator.
53  // We could also just use the default constructor to create end() iterators,
54  // this would have the same effect.
55  if( ! generator_ ) {
56  return;
57  }
58 
59  // If the generator is valid, initialize our input sources and start iterating them.
60  // Init the iterators and variant storage.
61  iterators_.reserve( generator_->inputs_.size() );
62  variant_sizes_.reserve( generator_->inputs_.size() );
63  for( size_t i = 0; i < generator_->inputs_.size(); ++i ) {
64  iterators_.emplace_back( generator_->inputs_[i].begin() );
65 
66  // We now have stored the iterator and called its begin() function,
67  // which in the LambdaIterator already obtains the first element.
68  // We use this to get the number of BaseCounts objects in the Variant.
69  // We will later need this to default-construct that many BaseCounts
70  // for positions where this iterator does not have data.
71  auto const sample_name_count = generator_->inputs_[i].data().sample_names.size();
72  if( iterators_[i] ) {
73  variant_sizes_.push_back( iterators_[i]->samples.size() );
74 
75  // We assume that the sample_names are of the correct size, if given.
76  if( sample_name_count > 0 && iterators_[i]->samples.size() != sample_name_count ) {
77  throw std::runtime_error(
78  "Input source for VariantParallelInputIterator contains " +
79  std::to_string( iterators_[i]->samples.size() ) + " samples, but its sample " +
80  "name list contains " + std::to_string( sample_name_count ) + " names."
81  );
82  }
83 
84  // Let's make sure that the first position is a valid chromosome and
85  // position. Later, when we advance the iterator, we repeat the check
86  // for every locus we go to as well, just to be sure.
87  assert_correct_chr_and_pos_( iterators_[i] );
88  } else {
89  // If the iterator does not have any data at all, that is, it is already at its end,
90  // we use the length of its sample_names list to indicate how many samples it would
91  // have contained if it had data. This is useful for example in situations where the
92  // input is filtered, so that a file that in fact does have data appears to be without
93  // data here. Using the correct number of samples is then important for some downstream
94  // processors that expect the correct number of samples.
95  variant_sizes_.push_back( sample_name_count );
96  }
97  }
98 
99  // We use the sum of all to allocate memory for effciency. Let's compute that sum once.
100  variant_size_sum_ = std::accumulate(
101  variant_sizes_.begin(),
102  variant_sizes_.end(),
103  decltype( variant_sizes_ )::value_type( 0 )
104  );
105 
106  // Init with default constructed Variants.
107  variants_.resize( generator_->inputs_.size() );
108 
109  // Make sure all have the same size.
110  assert( iterators_.size() == generator_->inputs_.size() );
111  assert( iterators_.size() == variants_.size() );
112  assert( iterators_.size() == variant_sizes_.size() );
113 
114  // Lastly, start with the additional carrying loci.
115  carrying_locus_it_ = generator_->carrying_loci_.cbegin();
116 
117  // Now go to the first locus we want.
118  advance_();
119 }
120 
121 // =================================================================================================
122 // Iterator Accessors
123 // =================================================================================================
124 
125 Variant VariantParallelInputIterator::Iterator::joined_variant(
126  bool allow_ref_base_mismatches,
127  bool allow_alt_base_mismatches,
128  bool move_samples
129 ) {
130  assert( iterators_.size() == variants_.size() );
131  assert( iterators_.size() == variant_sizes_.size() );
132  assert( iterators_.size() == generator_->inputs_.size() );
133 
134  // Prepare the result.
135  Variant res;
136  res.chromosome = current_locus_.chromosome;
137  res.position = current_locus_.position;
138  res.samples.reserve( variant_size_sum_ );
139 
140  // Special edge case: No inputs at all.
141  if( variants_.empty() ) {
142  return res;
143  }
144  assert( variants_.size() > 0 );
145  assert( variant_sizes_.size() > 0 );
146 
147  // Not all variants might have data; some might be the empty optional.
148  // We hence need to keep track of whether we already initialized our result or not.
149  // This only concernes the ref and alt base fields.
150  bool bases_init = false;
151 
152  // Go through all variants, and for those that have data, check the data correctness,
153  // and add them to the result.
154  for( size_t i = 0; i < variants_.size(); ++i ) {
155 
156  // If the variant has data, use it.
157  if( variants_[i] ) {
158 
159  // We already check all of the below when adding the data to variants_.
160  // Still, assert that this is all good.
161  assert( variants_[i]->chromosome == res.chromosome );
162  assert( variants_[i]->position == res.position );
163  assert( variants_[i]->samples.size() == variant_sizes_[i] );
164 
165  // Set and check the ref and alt bases.
166  // This is the first input that has data here. Use it to initialize the bases.
167  if( ! bases_init ) {
168  res.reference_base = variants_[i]->reference_base;
169  res.alternative_base = variants_[i]->alternative_base;
170  bases_init = true;
171  }
172 
173  // Now check that all inputs have the same bases. We however overwrite any input
174  // that has an N with an input that does not have N, to get the best data.
175  if( res.reference_base != variants_[i]->reference_base ) {
176  if( res.reference_base == 'N' ) {
177  res.reference_base = variants_[i]->reference_base;
178  } else if( allow_ref_base_mismatches ) {
179  res.reference_base = 'N';
180  } else {
181  throw std::runtime_error(
182  "Mismatching reference bases while iterating input sources in parallel at " +
183  to_string( current_locus_ ) + ". Some sources have base '" +
184  std::string( 1, res.reference_base ) + "' while others have '" +
185  std::string( 1, variants_[i]->reference_base ) + "'."
186  );
187  }
188  }
189  if( res.alternative_base != variants_[i]->alternative_base ) {
190  if( res.alternative_base == 'N' ) {
191  res.alternative_base = variants_[i]->alternative_base;
192  } else if( allow_alt_base_mismatches ) {
193  res.alternative_base = 'N';
194  } else {
195  throw std::runtime_error(
196  "Mismatching alternative bases while iterating input sources in parallel at " +
197  to_string( current_locus_ ) + ". Some sources have base '" +
198  std::string( 1, res.alternative_base ) + "' while others have '" +
199  std::string( 1, variants_[i]->alternative_base ) + "'."
200  );
201  }
202  }
203 
204  // Now move or copy the samples. The reserve calls in the following are not necessary,
205  // as we already allocate enough capacity above. We keep them here for future reference.
206  if( move_samples ) {
207  // res.samples.reserve( res.samples.size() + variants_[i]->samples.size() );
208  std::move(
209  std::begin( variants_[i]->samples ),
210  std::end( variants_[i]->samples ),
211  std::back_inserter( res.samples )
212  );
213  variants_[i]->samples.clear();
214  } else {
215  // res.samples.reserve( res.samples.size() + variants_[i]->samples.size() );
216  std::copy(
217  std::begin( variants_[i]->samples ),
218  std::end( variants_[i]->samples ),
219  std::back_inserter( res.samples )
220  );
221  }
222 
223  } else {
224 
225  // If the variant has no data, put as many dummy samples with empty BaseCounts
226  // into the result as the input source has samples in its data positions.
227  // res.samples.reserve( res.samples.size() + variant_sizes_[i].size() );
228  for( size_t k = 0; k < variant_sizes_[i]; ++k ) {
229  res.samples.emplace_back();
230  }
231  }
232  }
233 
234  // If none of the input sources had data, that means that we are currently at an
235  // additional carrying locus. Check this. We do not need to do anything else,
236  // as the resulting Variant already contains all the information that we have at hand.
237  assert(
238  bases_init ||
239  (
240  carrying_locus_it_ != generator_->carrying_loci_.cend() &&
241  locus_equal( *carrying_locus_it_, current_locus_ )
242  )
243  );
244 
245  // Make sure that the number of samples is the same as the sum of all sample sizes
246  // in the variant_sizes_ vector combined.
247  assert( res.samples.size() == variant_size_sum_ );
248 
249  // Done. Return the merged result.
250  return res;
251 }
252 
253 // =================================================================================================
254 // Iterator Internal Members
255 // =================================================================================================
256 
257 // -------------------------------------------------------------------------
258 // advance_using_carrying_()
259 // -------------------------------------------------------------------------
260 
261 void VariantParallelInputIterator::Iterator::advance_using_carrying_()
262 {
263  // Candidate locus. We look for the earliest position of the carrying iterators,
264  // as this is the next one we want to go to.
265  GenomeLocus cand_loc;
266 
267  // Go through all carrying iterators and find the earliest next position of any of them.
268  assert( iterators_.size() == generator_->selections_.size() );
269  for( size_t i = 0; i < iterators_.size(); ++i ) {
270  auto& iterator = iterators_[i];
271  if( ! iterator || generator_->selections_[i] != ContributionType::kCarrying ) {
272  continue;
273  }
274 
275  // In all iterators, we already have moved on to at least the current position.
276  // This assumes that all of the inputs are properly sorted. We check this in
277  // increment_iterator_()
278  // This also works for the very first time this function is called (in the iterator
279  // constructor), as the iterator will then also compare greater than the
280  // current_locus_, which is empty at this point.
281  assert( locus_greater_or_equal(
282  iterator->chromosome, iterator->position, current_locus_
283  ));
284 
285  // If this iterator is currently one of the ones that contain the current
286  // position, it's time to move on now. If not, we already asserted that it is
287  // greater, which means, it it still waiting for its turn, so nothing to do now.
288  if( locus_equal( iterator->chromosome, iterator->position, current_locus_ )) {
289  increment_iterator_( iterator );
290 
291  // We might now be done with this input source.
292  if( ! iterator ) {
293  continue;
294  }
295  }
296 
297  // Now comes the part where we find the earliest position that we want to stop at.
298  // Stop at the earliest postion of any iterator of type carrying (all of its positions
299  // need to be included), or if we are in the first iteration of the loop.
300  if(
301  cand_loc.empty() ||
302  locus_less( iterator->chromosome, iterator->position, cand_loc )
303  ) {
304 
305  cand_loc = GenomeLocus{ iterator->chromosome, iterator->position };
306  }
307  }
308 
309  // If there are additional carrying loci, use them to find the candidate as well.
310  assert( generator_ );
311  if( carrying_locus_it_ != generator_->carrying_loci_.cend() ) {
312  // All the assertions from above apply here as well.
313  assert( ! carrying_locus_it_->empty() );
314  assert( locus_greater_or_equal( *carrying_locus_it_, current_locus_ ) );
315 
316  // If the carrying locus is at the current locus, we need to move it forward,
317  // same as above.
318  if( locus_equal( *carrying_locus_it_, current_locus_ ) ) {
319  ++carrying_locus_it_;
320  }
321 
322  // Now, if it still is not at its end, we can use it as a candidate as well,
323  // if it is earlier than the current input source based candidate (of it the candidate
324  // is empty, which for example happens if all input sources are following, or if all
325  // inputs have already reached their end).
326  if(
327  carrying_locus_it_ != generator_->carrying_loci_.cend() &&
328  (
329  cand_loc.empty() ||
330  locus_less( *carrying_locus_it_, cand_loc )
331  )
332  ) {
333  cand_loc = *carrying_locus_it_;
334  }
335  }
336 
337  // If we have not set any candidate locus, that means that all carrying iterators
338  // are at their end. Time to wrap up then.
339  if( cand_loc.empty() ) {
340  assert( generator_ );
341  assert( generator_->has_carrying_input_ );
342 
343  // Assert that indeed all carrying iterators are at their end.
344  assert( [&](){
345  for( size_t i = 0; i < iterators_.size(); ++i ) {
346  if( generator_->selections_[i] == ContributionType::kCarrying && iterators_[i] ) {
347  return false;
348  }
349  }
350  return true;
351  }() );
352 
353  // Also, we must have reached the end of the additional carrying loci,
354  // otherwise we would have found a candidate from there.
355  assert( carrying_locus_it_ == generator_->carrying_loci_.cend() );
356 
357  // We are done here.
358  generator_ = nullptr;
359  return;
360  }
361 
362  // We have found a new locus. It needs to be further down from the current
363  // (again this also works in the first call of this function, when current is empty).
364  assert( cand_loc > current_locus_ );
365 
366  // Now that we found the next position to go to, move _all_ iterators to it
367  // (or the next one beyond, if it does not have that position).
368  for( size_t i = 0; i < iterators_.size(); ++i ) {
369  auto& iterator = iterators_[i];
370 
371  // Nothing to do if the iterator is already at its end.
372  if( ! iterator ) {
373  continue;
374  }
375 
376  // Same assertion as above, this time for all of them, just to be sure.
377  assert( locus_greater_or_equal(
378  iterator->chromosome, iterator->position, current_locus_
379  ));
380 
381  // Now move the iterator until we reach the candidate, or one beyond.
382  // For carrying iterators, this loop can only get called once at max (or not at all,
383  // if this source is already at or beyond the candidate from the loop above),
384  // as we never want to skip anything in a carrying iterator. Assert this.
385  size_t cnt = 0;
386  while( iterator && locus_less( iterator->chromosome, iterator->position, cand_loc )) {
387  increment_iterator_( iterator );
388  ++cnt;
389  }
390  (void) cnt;
391  assert( generator_->selections_[i] != ContributionType::kCarrying || cnt <= 1 );
392  }
393 
394  // Finally, update the current locus, and set the variants according to the iterators.
395  // The order of these is imporant, as the latter needs the former to be set.
396  current_locus_ = cand_loc;
397  update_variants_();
398 }
399 
400 // -------------------------------------------------------------------------
401 // advance_using_only_following_()
402 // -------------------------------------------------------------------------
403 
404 void VariantParallelInputIterator::Iterator::advance_using_only_following_()
405 {
406  // If this function is called, we only have following iterators,
407  // so there are no addtional carrying loci given.
408  assert( carrying_locus_it_ == generator_->carrying_loci_.cend() );
409  assert( generator_->carrying_loci_.empty() );
410 
411  // Once one of the iterators reaches its end, we are done, as then there cannot
412  // be any more intersections.
413  bool one_at_end = false;
414 
415  // If this is not the first call of this function (the one that is done in the constructor
416  // of the iterator), move all iterators at least once, to get away from the current locus.
417  assert( iterators_.size() == generator_->selections_.size() );
418  if( ! current_locus_.empty() ) {
419  for( size_t i = 0; i < iterators_.size(); ++i ) {
420  auto& iterator = iterators_[i];
421 
422  // This function is only ever called if all inputs are of type following.
423  assert( generator_->selections_[i] == ContributionType::kFollowing );
424 
425  // As we are doing the intersection of all iterators here, none of them can be at the
426  // end right now. If one were, we would already have reached the end of our
427  // parallel iteration before, and never entered this function.
428  assert( iterator );
429 
430  // In all iterators, we must be at the current locus, as this is only intersections.
431  // So now, it's time to move on once. Then, go to the next iterator.
432  // The rest of this loop is not needed in this first round.
433  assert( locus_equal( iterator->chromosome, iterator->position, current_locus_ ));
434  increment_iterator_( iterator );
435 
436  // Check if we are done with this iterator. If so, we are completely done,
437  // no need to do anything else here.
438  if( ! iterator ) {
439  one_at_end = true;
440  break;
441  }
442  }
443  }
444 
445  // Candidate locus. We look for the earliest locus that all inputs share.
446  GenomeLocus cand_loc;
447 
448  // Loop until we have found a locus that all iterators share,
449  // or until one of them is at the end (in which case, there won't be any more intersections
450  // and we are done with the parallel iteration).
451  bool found_locus = false;
452  while( ! found_locus && ! one_at_end ) {
453 
454  // Assume that we are done. Below, we will reset these if we are not in fact done.
455  found_locus = true;
456 
457  // Try to find the candidate in all inputs.
458  for( size_t i = 0; i < iterators_.size(); ++i ) {
459  auto& iterator = iterators_[i];
460 
461  // This function is only ever called if all inputs are of type following.
462  assert( generator_->selections_[i] == ContributionType::kFollowing );
463 
464  // If the iterator is already at its end, we are done here.
465  // This case can here only occur if we have an empty input source,
466  // in which case the call to adanvance_() made from the constructor lead us here.
467  // We assert that this is indeed the first call of this function by using the
468  // current_element_ as a check.
469  if( ! iterator ) {
470  assert( current_locus_.empty() );
471  one_at_end = true;
472  break;
473  }
474 
475  // Init the candidate. This happens in the first iteration of the for loop.
476  if( cand_loc.empty() ) {
477  assert( i == 0 );
478  cand_loc = GenomeLocus{ iterator->chromosome, iterator->position };
479  }
480 
481  // If the iterator is behind the candidate, move it forward until it either catches
482  // up, or overshoots the locus, or reaches its end.
483  while( iterator && locus_less( iterator->chromosome, iterator->position, cand_loc )) {
484  increment_iterator_( iterator );
485  }
486 
487  // If the iterator reached its end now, we are done here.
488  // No more intersections can occur, we can leave the whole thing.
489  if( ! iterator ) {
490  one_at_end = true;
491  break;
492  }
493 
494  // If we have an overshoot, the candidate is not good, as this means that not all
495  // inputs have that locus (as exemplified by that overshoot). In that case,
496  // we store the new candidate, and leave the loop. It does not really matter here
497  // if we go on to the next input, or just start the whole outer loop again.
498  // Let's keep the pace and continue with the next input.
499  assert( iterator );
500  if( locus_greater( iterator->chromosome, iterator->position, cand_loc )) {
501  cand_loc = GenomeLocus{ iterator->chromosome, iterator->position };
502  found_locus = false;
503  continue; // or break for the alternative loop order
504  }
505 
506  // If we are here, we have reached the candidate locus.
507  assert( iterator );
508  assert( locus_equal( iterator->chromosome, iterator->position, cand_loc ));
509  }
510  }
511 
512  // Only one of the exit conditions can be true (unless there is no input at all).
513  assert( iterators_.size() == 0 || ( found_locus ^ one_at_end ));
514 
515  // If we have not found any locus, that means that at least one of the iterators is at its end.
516  // No more intersections can occur. Time to wrap up then.
517  if( one_at_end ) {
518  assert( ! generator_->has_carrying_input_ );
519  generator_ = nullptr;
520 
521  // Assert that exactly one is at its end. Theoretically, in our search, other iterators
522  // might also have been about to reach their end, but as we immediately exit the above
523  // loop once we find an end, these are not incremented to their end yet.
524  assert( [&](){
525  size_t at_end_cnt = 0;
526  for( size_t i = 0; i < iterators_.size(); ++i ) {
527  if( ! iterators_[i] ) {
528  ++at_end_cnt;
529  }
530  }
531  return at_end_cnt == 1;
532  }() );
533 
534  // We have indicated our end by nulling generator_. Nothing more to do.
535  return;
536  }
537 
538  // If we are here, we have found a good new locus. It needs to be further down from the current
539  // (again this also works in the first call of this function, when current is empty).
540  assert( iterators_.size() == 0 || cand_loc > current_locus_ );
541 
542  // Assert that all are at the given locus, and not at their end.
543  assert( iterators_.size() == 0 || found_locus );
544  assert( [&](){
545  for( size_t i = 0; i < iterators_.size(); ++i ) {
546  auto const& iterator = iterators_[i];
547  if( ! iterator || ! locus_equal( iterator->chromosome, iterator->position, cand_loc )) {
548  return false;
549  }
550  }
551  return true;
552  }() );
553 
554  // Finally, update the current locus, and set the variants according to the iterators.
555  // The order of these is imporant, as the latter needs the former to be set.
556  current_locus_ = cand_loc;
557  update_variants_();
558 }
559 
560 // -------------------------------------------------------------------------
561 // increment_iterator_()
562 // -------------------------------------------------------------------------
563 
564 void VariantParallelInputIterator::Iterator::increment_iterator_(
565  VariantInputIterator::Iterator& iterator
566 ) {
567  // If we already reached the end, do nothing.
568  // if( ! iterator ) {
569  // return;
570  // }
571  //
572  // Nope, this function should never be called on a finished iterator.
573  assert( iterator );
574 
575  // We here check that the iterator is in chrom/pos order. This is also already done
576  // by default in most of our file format iterators, and we here need an expensive
577  // string copy just for this one check, but it feels like this is necessary to be
578  // on the safe side. After all, this parallel iterator here is a bit tricky anyway,
579  // so we have to live with that cost.
580  auto const prev_loc = GenomeLocus{ iterator->chromosome, iterator->position };
581 
582  // Now do the increment and check whether we are done with this source.
583  ++iterator;
584  if( ! iterator ) {
585  return;
586  }
587 
588  // Check that it is has a valid chromosome and position, and
589  // make sure that the input is sorted.
590  assert_correct_chr_and_pos_( iterator );
591  if( locus_less_or_equal( iterator->chromosome, iterator->position, prev_loc )) {
592  throw std::runtime_error(
593  "Cannot iterate multiple input sources in parallel, as (at least) "
594  "one of them is not sorted by chromosome and position. "
595  "Offending input source: " + iterator.data().source_name + " at " +
596  iterator->chromosome + ":" + std::to_string( iterator->position )
597  );
598  }
599 }
600 
601 // -------------------------------------------------------------------------
602 // assert_correct_chr_and_pos_()
603 // -------------------------------------------------------------------------
604 
605 void VariantParallelInputIterator::Iterator::assert_correct_chr_and_pos_(
606  VariantInputIterator::Iterator const& iterator
607 ) {
608  assert( iterator );
609 
610  // This is checked already in our file format iterators, but we heavily depend
611  // on this here, so let's check it.
612  if( iterator->chromosome.empty() || iterator->position == 0 ) {
613  throw std::runtime_error(
614  "Cannot iterate multiple input sources in parallel, as (at least) "
615  "one of them has an invalid chromosome (empty name) or position (0). "
616  "Offending input source: " + iterator.data().source_name + " at " +
617  iterator->chromosome + ":" + std::to_string( iterator->position )
618  );
619  }
620 }
621 
622 // -------------------------------------------------------------------------
623 // update_variants_()
624 // -------------------------------------------------------------------------
625 
626 void VariantParallelInputIterator::Iterator::update_variants_()
627 {
628  assert( iterators_.size() == variants_.size() );
629  for( size_t i = 0; i < iterators_.size(); ++i ) {
630  auto& iterator = iterators_[i];
631 
632  // If the iterator is already finished, we store an empty optional variant.
633  if( ! iterator ) {
634  variants_[i] = utils::nullopt;
635  continue;
636  }
637 
638  // If the iterator is at the current locus, we store its data here,
639  // so that users can access it. If not, we store an empty optional variant.
640  if( locus_equal( iterator->chromosome, iterator->position, current_locus_ )) {
641 
642  // We ideally want to move all data here, for efficiency.
643  // The user does not have access to the iterators, so this is okay.
644  // We however cannot move all the data, as we will later need access to the
645  // chromosome and position of the iterators; so instead, we only move the expensive
646  // BaseCounts samples. In order to avoid that when we add more elements to Variant later
647  // and then accidentally forget to also set them here, we do a three-step process where
648  // we move the BaseCounts over to a temp location first, and then copy the rest.
649  // This ensures that whatever other fields Variant gets in the future, we always
650  // copy them as well. So future proof!
651 
652  // Move the samples and leave them in a well-defined empty state in the iterator.
653  auto tmp_samples = std::move( iterator->samples );
654  iterator->samples.clear();
655 
656  // Now we can copy the rest, which will not copy any samples (as we just cleared them),
657  // and then finally move over the samples again.
658  variants_[i] = *iterator;
659  variants_[i]->samples = std::move( tmp_samples );
660 
661  // Check for consistency. This is also already checked in all our input
662  // file sources that we have implemented so far, but better safe than sorry.
663  // Also, we need to do this in case someone uses a different source that does not check.
664  if( variants_[i]->samples.size() != variant_sizes_[i] ) {
665  throw std::runtime_error(
666  "Cannot iterate multiple input sources in parallel, as (at least) "
667  "one of them has an inconsistent number of samples. "
668  "Offending input source: " + iterator.data().source_name + " at " +
669  iterator->chromosome + ":" + std::to_string( iterator->position ) + ". " +
670  "Expecting " + std::to_string( variant_sizes_[i] ) +
671  " samples (based on the first used line of input of that source), " +
672  "but found " + std::to_string( variants_[i]->samples.size() ) +
673  " at the indicated locus."
674  );
675  }
676 
677  // Naive version that just makes a full copy.
678  // variants_[i] = *iterator;
679  } else {
680  // The iterator is not at our current locus. We already checked that it is
681  // however still valid, so it must be beyond the current one. Assert this.
682  assert( locus_greater(
683  iterator->chromosome, iterator->position, current_locus_
684  ));
685 
686  variants_[i] = utils::nullopt;
687  }
688  }
689 }
690 
691 } // namespace population
692 } // namespace genesis
genesis::population::locus_equal
bool locus_equal(std::string const &l_chromosome, size_t l_position, std::string const &r_chromosome, size_t r_position)
Equality comparison (==) for two loci in a genome.
Definition: functions/genome_locus.hpp:169
genesis::population::GenomeLocus::empty
bool empty() const
Definition: genome_locus.hpp:74
genesis::population::Variant::position
size_t position
Definition: variant.hpp:65
genesis::population::locus_greater
bool locus_greater(std::string const &l_chromosome, size_t l_position, std::string const &r_chromosome, size_t r_position)
Greater than comparison (>) for two loci in a genome.
Definition: functions/genome_locus.hpp:331
genesis::utils::nullopt
const nullopt_t nullopt((nullopt_t::init()))
Optional to indicate an empty value.
genesis::population::GenomeLocus
A single locus, that is, a position (or coordinate) on a chromosome.
Definition: genome_locus.hpp:56
genesis::population::Variant::reference_base
char reference_base
Definition: variant.hpp:66
genesis::population::locus_greater_or_equal
bool locus_greater_or_equal(std::string const &l_chromosome, size_t l_position, std::string const &r_chromosome, size_t r_position)
Greater than or equal comparison (>=) for two loci in a genome.
Definition: functions/genome_locus.hpp:441
genesis::population::GenomeLocus::chromosome
std::string chromosome
Definition: genome_locus.hpp:58
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genesis::population::Variant::samples
std::vector< BaseCounts > samples
Definition: variant.hpp:69
genesis::population::Variant
A single variant at a position in a chromosome, along with BaseCounts for a set of samples.
Definition: variant.hpp:62
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
variant_parallel_input_iterator.hpp
genesis::population::Variant::alternative_base
char alternative_base
Definition: variant.hpp:67
genesis::population::locus_less
bool locus_less(std::string const &l_chromosome, size_t l_position, std::string const &r_chromosome, size_t r_position)
Less than comparison (<) for two loci in a genome.
Definition: functions/genome_locus.hpp:277
genesis::population::locus_less_or_equal
bool locus_less_or_equal(std::string const &l_chromosome, size_t l_position, std::string const &r_chromosome, size_t r_position)
Less than or equal comparison (<=) for two loci in a genome.
Definition: functions/genome_locus.hpp:385
genesis::population::Variant::chromosome
std::string chromosome
Definition: variant.hpp:64