1 #ifndef GENESIS_POPULATION_WINDOW_REGION_WINDOW_STREAM_H_
2 #define GENESIS_POPULATION_WINDOW_REGION_WINDOW_STREAM_H_
49 #include <type_traits>
50 #include <unordered_set>
55 namespace population {
88 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
102 using InputType =
typename InputStreamIterator::value_type;
135 using InputType =
typename InputStreamIterator::value_type;
164 base_iterator_type::current_ == base_iterator_type::end_ &&
165 parent_->skip_empty_regions_
172 find_next_chromosome_regions_();
173 fill_next_windows_();
177 base_iterator_type::is_first_window_ = ( windows_.size() > 0 );
178 base_iterator_type::is_last_window_ = (
179 windows_.size() == 1 ||
180 ( windows_.size() > 1 && windows_[0].chromosome() != windows_[1].chromosome() )
187 if( windows_.empty() ) {
188 assert( base_iterator_type::current_ == base_iterator_type::end_ );
214 void increment_() override final
220 assert( windows_.size() > 0 );
222 assert( parent_->region_list_ );
228 if( windows_.size() == 1 ) {
234 assert( base_iterator_type::current_ == base_iterator_type::end_ );
235 assert( base_iterator_type::is_last_window_ );
237 seen_chromosomes_.size() == parent_->region_list_->chromosome_count() ||
238 parent_->skip_empty_regions_
240 windows_.pop_front();
244 assert( windows_.size() > 1 );
249 base_iterator_type::is_first_window_ = (
250 windows_[0].chromosome() != windows_[1].chromosome()
256 windows_.pop_front();
257 fill_next_windows_();
262 assert( windows_.size() > 0 );
263 base_iterator_type::is_last_window_ = (
264 windows_.size() == 1 ||
265 ( windows_[0].chromosome() != windows_[1].chromosome() )
269 assert( ! windows_.front().empty() || ! parent_->skip_empty_regions_ );
282 void fill_next_windows_()
286 assert( parent_->region_list_ );
287 auto const& region_list = *parent_->region_list_;
288 auto& cur_it = base_iterator_type::current_;
289 auto& end_it = base_iterator_type::end_;
301 while( cur_it != end_it ) {
315 auto const cur_win_done = (
316 windows_.size() > 0 && (
317 cur_chr != windows_.front().chromosome() ||
318 cur_pos > windows_.front().last_position()
327 auto const next_win_determined = (
328 windows_.size() > 1 && (
329 ! windows_[1].empty() ||
330 ! parent_->skip_empty_regions_
338 if( cur_win_done && next_win_determined ) {
350 if( windows_.empty() || cur_chr != windows_.back().chromosome() ) {
356 find_next_chromosome_regions_();
361 if( cur_it == end_it ) {
369 assert( ! windows_.empty() );
373 assert( cur_index_ == 0 );
382 assert( ! windows_.empty() );
385 fill_windows_at_current_position_( cur_chr, cur_pos );
411 windows_.size() == 1 &&
412 cur_chr == windows_.front().chromosome() &&
413 cur_pos > windows_.front().last_position()
415 windows_.size() == 1 &&
416 cur_chr != windows_.front().chromosome()
424 assert( cur_it != end_it );
426 windows_.size() > 1 || (
427 windows_.size() == 1 &&
428 cur_chr == windows_.front().chromosome() &&
429 cur_pos <= windows_.front().last_position()
443 throw std::runtime_error(
444 "Input not sorted or containing duplicate positions,"
445 " on chromosome '" + cur_chr +
"',"
447 " found in the input after already having seen position " +
457 if( cur_it == end_it && parent_->skip_empty_regions_ ) {
461 while( ! windows_.empty() && windows_.back().empty() ) {
473 ! parent_->skip_empty_regions_ &&
474 seen_chromosomes_.size() != region_list.chromosome_count()
481 for(
auto const& chr : region_list.chromosome_names() ) {
482 assert( region_list.region_count( chr ) > 0 );
483 if( seen_chromosomes_.count( chr ) == 0 ) {
484 add_chromosome_windows_( chr );
497 void fill_windows_at_current_position_(
498 std::string
const& cur_chr,
501 auto& cur_it = base_iterator_type::current_;
502 auto& end_it = base_iterator_type::end_;
506 assert( cur_it != end_it );
507 assert( ! cur_chr.empty() );
508 assert( cur_pos > 0 );
509 assert( seen_chromosomes_.count( cur_chr ) > 0 );
510 assert( windows_.size() > 0 );
517 while( i < windows_.size() ) {
518 auto& window = windows_[i];
523 if( cur_chr == window.chromosome() && cur_pos < window.first_position() ) {
529 cur_chr == window.chromosome() &&
530 cur_pos >= window.first_position() &&
531 cur_pos <= window.last_position()
535 assert( cur_it != end_it );
539 window.entries().emplace_back(
552 parent_->skip_empty_regions_ &&
553 ( cur_chr != window.chromosome() || cur_pos > window.last_position() )
555 windows_.erase( windows_.begin() + i );
561 assert( ! window.empty() || ! parent_->skip_empty_regions_ );
576 void find_next_chromosome_regions_()
580 assert( parent_->region_list_ );
581 auto const& region_list = *parent_->region_list_;
582 auto& cur_it = base_iterator_type::current_;
583 auto& end_it = base_iterator_type::end_;
587 assert( cur_it != end_it || windows_.size() == 0 );
591 std::string cur_chr =
"";
592 while( cur_chr.empty() && cur_it != end_it ) {
596 if( seen_chromosomes_.count( cur_chr ) > 0 ) {
597 throw std::runtime_error(
598 "Input not sorted, chromosome '" + cur_chr +
"' has been in the input before."
605 ! region_list.has_chromosome( cur_chr ) ||
606 region_list.region_count( cur_chr ) == 0
623 if( cur_it == end_it ) {
624 assert( cur_chr.empty() );
631 assert( cur_it != end_it );
633 assert( region_list.has_chromosome( cur_chr ));
634 assert( region_list.region_count( cur_chr ) > 0 );
637 add_chromosome_windows_( cur_chr );
638 assert( windows_.size() > 0 );
648 void add_chromosome_windows_( std::string
const& chromosome )
652 assert( parent_->region_list_ );
653 auto const& region_list = *parent_->region_list_;
657 assert( region_list.has_chromosome( chromosome ));
658 assert( region_list.region_count( chromosome ) > 0 );
659 assert( seen_chromosomes_.count( chromosome ) == 0 );
667 for(
auto const& region : region_list.chromosome_regions( chromosome ) ) {
668 if( region.low() == 0 || region.high() == 0 ) {
669 throw std::runtime_error(
670 "Cannot process whole chromosomes with RegionWindowStream"
678 windows_.back().chromosome() != chromosome ||
679 region.low() >= windows_.back().first_position()
683 windows_.emplace_back();
684 auto& window = windows_.back();
685 window.chromosome( chromosome );
686 window.first_position( region.low() );
687 window.last_position( region.high() );
691 seen_chromosomes_.insert( chromosome );
694 value_type& get_current_window_() const override final
698 assert( ! windows_.empty() );
699 return const_cast<value_type&
>( windows_.front() );
702 BaseWindowStream<InputStreamIterator, DataType>
const* get_parent_() const override final
715 std::unordered_set<std::string> seen_chromosomes_;
720 std::deque<Window> windows_;
721 size_t cur_index_ = 0;
733 InputStreamIterator
begin, InputStreamIterator
end,
734 std::shared_ptr<GenomeRegionList> region_list
737 , region_list_( region_list )
739 if( ! region_list ) {
740 throw std::invalid_argument(
741 "No GenomeRegionList provided for creating RegionWindowStream"
762 skip_empty_regions_ = value;
768 return skip_empty_regions_;
777 std::unique_ptr<typename BaseWindowStream<InputStreamIterator, DataType>::BaseIterator>
786 std::unique_ptr<typename BaseWindowStream<InputStreamIterator, DataType>::BaseIterator>
800 std::shared_ptr<GenomeRegionList> region_list_;
803 bool skip_empty_regions_ =
true;
820 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
821 RegionWindowStream<InputStreamIterator, DataType>
823 InputStreamIterator begin, InputStreamIterator end,
824 std::shared_ptr<GenomeRegionList> region_list
840 template<
class InputStreamIterator>
841 RegionWindowStream<InputStreamIterator>
843 InputStreamIterator begin, InputStreamIterator end,
844 std::shared_ptr<GenomeRegionList> region_list
846 using DataType =
typename InputStreamIterator::value_type;
850 it.entry_input_function = []( DataType
const& variant ) {
853 it.chromosome_function = []( DataType
const& variant ) {
854 return variant.chromosome;
856 it.position_function = []( DataType
const& variant ) {
857 return variant.position;
875 template<
class InputStreamIterator>
876 WindowViewStream<InputStreamIterator>
878 InputStreamIterator begin, InputStreamIterator end,
879 std::shared_ptr<GenomeRegionList> region_list
889 #endif // include guard