1 #ifndef GENESIS_POPULATION_WINDOW_GENOME_WINDOW_STREAM_H_
2 #define GENESIS_POPULATION_WINDOW_GENOME_WINDOW_STREAM_H_
42 #include <type_traits>
43 #include <unordered_set>
47 namespace population {
86 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
88 InputStreamIterator, DataType, ::genesis::population::WindowView<DataType>
102 using InputType =
typename InputStreamIterator::value_type;
121 InputStreamIterator, DataType, WindowViewType
142 using InputType =
typename InputStreamIterator::value_type;
169 base_iterator_type::is_first_window_ =
true;
170 base_iterator_type::is_last_window_ =
true;
175 init_whole_genome_();
196 void increment_() override final
207 void init_whole_genome_()
212 if( base_iterator_type::current_ == base_iterator_type::end_ ) {
222 bool is_first =
true;
223 auto& cur = self_type::current_;
224 auto&
end = self_type::end_;
225 auto const par = parent_;
227 auto const seq_dict = parent_->sequence_dict_;
237 auto& window = window_;
240 is_first, &cur, &
end, par, chr, seq_dict, &window
242 assert( cur !=
end );
253 auto const old_pos = par->position_function( *cur );
258 if( cur ==
end || par->chromosome_function( *cur ) != chr ) {
264 auto const dict_entry = seq_dict->find( chr );
265 if( dict_entry == seq_dict->end() ) {
266 throw std::invalid_argument(
267 "In GenomeWindowStream: Cannot iterate chromosome \"" + chr +
268 "\", as the provided sequence dictionary or reference genome "
269 "does not contain the chromosome."
272 chr_len = dict_entry->length;
274 if( old_pos > chr_len ) {
275 throw std::invalid_argument(
276 "In GenomeWindowStream: Chromosome \"" + chr +
"\" has length " +
278 " in the provided sequence dictionary or reference genome, "
279 "but the input data contains positions up to " +
288 if( window.chromosomes().count( chr ) > 0 ) {
289 throw std::runtime_error(
290 "Chromosome " + chr +
" occurs multiple times in the input."
293 window.chromosomes()[ chr ] = chr_len;
302 assert( par->chromosome_function( *cur ) != chr );
303 chr = par->chromosome_function( *cur );
307 assert( cur !=
end );
308 assert( par->chromosome_function( *cur ) == chr );
311 auto const new_pos = par->position_function( *cur );
312 if( old_pos >= new_pos ) {
313 throw std::runtime_error(
314 "Invalid order on chromosome " + chr +
" with position " +
324 value_type& get_current_window_() const override final
329 base_type const* get_parent_() const override final
353 InputStreamIterator
begin, InputStreamIterator
end
377 return sequence_dict_;
392 sequence_dict_ = value;
402 std::unique_ptr<typename base_type::BaseIterator>
411 std::unique_ptr<typename base_type::BaseIterator>
414 return std::unique_ptr<DerivedIterator>(
new DerivedIterator(
nullptr ));
426 std::shared_ptr<genesis::sequence::SequenceDict> sequence_dict_;
441 template<
class InputStreamIterator,
class DataType =
typename InputStreamIterator::value_type>
442 GenomeWindowStream<InputStreamIterator, DataType>
444 InputStreamIterator begin, InputStreamIterator end
463 template<
class InputStreamIterator>
464 GenomeWindowStream<InputStreamIterator>
466 InputStreamIterator begin, InputStreamIterator end
468 using DataType =
typename InputStreamIterator::value_type;
472 it.entry_input_function = []( DataType
const& variant ) {
475 it.chromosome_function = []( DataType
const& variant ) {
476 return variant.chromosome;
478 it.position_function = []( DataType
const& variant ) {
479 return variant.position;
489 #endif // include guard