1 #ifndef GENESIS_POPULATION_WINDOW_CHROMOSOME_WINDOW_STREAM_H_
2 #define GENESIS_POPULATION_WINDOW_CHROMOSOME_WINDOW_STREAM_H_
43 #include <type_traits>
44 #include <unordered_set>
48 namespace population {
89 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
91 InputStreamIterator, DataType, ::genesis::population::WindowView<DataType>
105 using InputType =
typename InputStreamIterator::value_type;
124 InputStreamIterator, DataType, WindowViewType
145 using InputType =
typename InputStreamIterator::value_type;
172 base_iterator_type::is_first_window_ =
true;
173 base_iterator_type::is_last_window_ =
true;
197 void increment_() override final
207 base_iterator_type::current_ != base_iterator_type::end_ &&
210 ++base_iterator_type::current_;
214 if( base_iterator_type::current_ == base_iterator_type::end_ ) {
221 assert( base_iterator_type::current_ != base_iterator_type::end_ );
231 bool is_first =
true;
232 auto& cur = self_type::current_;
233 auto&
end = self_type::end_;
234 auto const par = parent_;
236 auto const seq_dict = parent_->sequence_dict_;
239 if( processed_chromosomes_.count( chr ) > 0 ) {
240 throw std::runtime_error(
241 "Chromosome " + chr +
" occurs multiple times in the input."
244 processed_chromosomes_.insert( chr );
249 if( parent_->sequence_dict_ ) {
250 auto const dict_entry = parent_->sequence_dict_->find( chr );
251 if( dict_entry == parent_->sequence_dict_->end() ) {
252 throw std::invalid_argument(
253 "In ChromosomeWindowStream: Cannot iterate chromosome \"" + chr +
254 "\", as the provided sequence dictionary or reference genome "
255 "does not contain the chromosome."
264 auto& window = window_;
268 is_first, &cur, &
end, par, chr, seq_dict, &window
274 assert( cur !=
end );
281 assert( cur !=
end );
282 auto const old_pos = par->position_function( *cur );
288 if( cur ==
end || par->chromosome_function( *cur ) != chr ) {
293 if( seq_dict && old_pos > seq_dict->get( chr ).length ) {
294 throw std::invalid_argument(
295 "In ChromosomeWindowStream: Chromosome \"" + chr +
"\" has length " +
297 " in the provided sequence dictionary or reference genome, "
298 "but the input data contains positions up to " +
306 window.last_position( old_pos );
312 assert( cur !=
end );
313 assert( par->chromosome_function( *cur ) == chr );
316 auto const new_pos = par->position_function( *cur );
317 if( old_pos >= new_pos ) {
318 throw std::runtime_error(
319 "Invalid order on chromosome " + chr +
" with position " +
330 value_type& get_current_window_() const override final
335 base_type const* get_parent_() const override final
350 std::unordered_set<std::string> processed_chromosomes_;
363 InputStreamIterator
begin, InputStreamIterator
end
387 return sequence_dict_;
403 sequence_dict_ = value;
413 std::unique_ptr<typename base_type::BaseIterator>
422 std::unique_ptr<typename base_type::BaseIterator>
425 return std::unique_ptr<DerivedIterator>(
new DerivedIterator(
nullptr ));
437 std::shared_ptr<genesis::sequence::SequenceDict> sequence_dict_;
449 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
450 ChromosomeWindowStream<InputStreamIterator, DataType>
452 InputStreamIterator begin, InputStreamIterator end
468 template<
class InputStreamIterator>
469 ChromosomeWindowStream<InputStreamIterator>
471 InputStreamIterator begin, InputStreamIterator end
473 using DataType =
typename InputStreamIterator::value_type;
477 it.entry_input_function = []( DataType
const& variant ) {
480 it.chromosome_function = []( DataType
const& variant ) {
481 return variant.chromosome;
483 it.position_function = []( DataType
const& variant ) {
484 return variant.position;
494 #endif // include guard