A library for working with phylogenetic and population genetic data.
v0.32.0
interval_window_stream.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_INTERVAL_WINDOW_STREAM_H_
2 #define GENESIS_POPULATION_WINDOW_INTERVAL_WINDOW_STREAM_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
38 
39 #include <cassert>
40 #include <functional>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <utility>
45 
46 namespace genesis {
47 namespace population {
48 
49 // =================================================================================================
50 // Sliding Interval Window Stream
51 // =================================================================================================
52 
73 template<class InputStreamIterator, class DataType = typename InputStreamIterator::value_type>
74 class IntervalWindowStream final : public BaseWindowStream<InputStreamIterator, DataType>
75 {
76 public:
77 
78  // -------------------------------------------------------------------------
79  // Typedefs and Enums
80  // -------------------------------------------------------------------------
81 
84 
86  using Entry = typename Window::Entry;
87  using InputType = typename InputStreamIterator::value_type;
88 
89  using iterator_category = std::input_iterator_tag;
90  using value_type = Window;
91  using pointer = value_type*;
93  using const_reference = value_type const&;
94 
95  // ======================================================================================
96  // Internal Iterator
97  // ======================================================================================
98 
102  class DerivedIterator final : public BaseWindowStream<InputStreamIterator, DataType>::BaseIterator
103  {
104  public:
105 
106  // -------------------------------------------------------------------------
107  // Constructors and Rule of Five
108  // -------------------------------------------------------------------------
109 
110  using self_type = typename IntervalWindowStream<
111  InputStreamIterator, DataType
113 
114  using base_iterator_type = typename BaseWindowStream<
115  InputStreamIterator, DataType
117 
119  using Entry = typename Window::Entry;
120  using InputType = typename InputStreamIterator::value_type;
121 
122  using iterator_category = std::input_iterator_tag;
124  using pointer = value_type*;
126  using const_reference = value_type const&;
127 
128  private:
129 
130  DerivedIterator() = default;
131 
133  IntervalWindowStream const* parent
134  )
135  : base_iterator_type( parent )
136  , parent_( parent )
137  {
138  // Edge case check. See Base for details.
139  if( ! parent_ ) {
140  return;
141  }
142 
143  // Call the base class initializer.
144  // base_iterator_type::init_( parent_ );
145 
146  // Check our own the settings.
147  if( parent_->width_ == 0 ) {
148  throw std::runtime_error( "Cannot use IntervalWindowStream of width 0." );
149  }
150  if( parent_->stride_ == 0 ) {
151  parent_->stride_ = parent_->width_;
152  }
153  if( parent_->stride_ > parent_->width_ ) {
154  throw std::runtime_error(
155  "Cannot use IntervalWindowStream with stride > width."
156  );
157  }
158 
159  // Let's get going.
160  init_chromosome_();
161 
162  // If the input is empty (no data at all), we might already be done.
163  // If not, fill the window with data.
164  if( parent_ ) {
165  update_();
166  }
167  }
168 
169  public:
170 
171  virtual ~DerivedIterator() override = default;
172 
173  DerivedIterator( self_type const& ) = default;
174  DerivedIterator( self_type&& ) = default;
175 
176  DerivedIterator& operator= ( self_type const& ) = default;
177  DerivedIterator& operator= ( self_type&& ) = default;
178 
180 
181  // -------------------------------------------------------------------------
182  // Internal and Virtual Members
183  // -------------------------------------------------------------------------
184 
185  private:
186 
187  void init_chromosome_()
188  {
189  // Check that we are still good. If not, this function being called is likely a user
190  // error by trying to increment a past-the-end iterator.
191  assert( parent_ );
192 
193  // Saveguard. This might be called on an empty range, in which case we just do nothing.
194  if( base_iterator_type::current_ == base_iterator_type::end_ ) {
195  parent_ = nullptr;
196  return;
197  }
198 
199  // Clear the window and prepare for new chromosome.
200  window_.clear();
201  window_.chromosome( parent_->chromosome_function( *base_iterator_type::current_ ));
202  base_iterator_type::is_first_window_ = true;
203  base_iterator_type::is_last_window_ = false;
204  next_index_ = 0;
205 
206  if( parent_->emit_leading_empty_windows_ ) {
207  current_start_ = 1;
208  } else {
209  // Set the start to the window position that we would get after going through all
210  // the previous windows if they were emitted.
211  auto const pos = parent_->position_function( *base_iterator_type::current_ );
212  current_start_ = pos - (( pos - 1 ) % parent_->stride_ );
213  }
214  }
215 
216  void increment_() override final
217  {
218  // Basic check again.
219  assert( parent_ );
220 
221  // Special case: If we have no more underlying data, the iterator still needs to stop
222  // at the last window(s), so that they can be processed. After that, when this
223  // increment_() function is called again by the user, we then set parent_ = nullptr
224  // to indicate that now we are done for good.
225  if( base_iterator_type::current_ == base_iterator_type::end_ ) {
226  // If current_ == end_, we have definitely reached the end of the input, so we need
227  // to have set is_last_window_ previously. If not set, that means it was already
228  // reset, so that this is an iteration past the end.
229  if( ! base_iterator_type::is_last_window_ ) {
230  throw std::runtime_error(
231  "IntervalWindowStream: Incrementing past the end"
232  );
233  }
234 
235  // Indicate that we are done now.
236  parent_ = nullptr;
237  return;
238  }
239 
240  // Check if this call moves to the next chromosome.
241  if(
242  parent_->chromosome_function( *base_iterator_type::current_ ) != window_.chromosome()
243  ) {
244  init_chromosome_();
245  } else {
246  // Update positions.
247  current_start_ += parent_->stride_;
248  base_iterator_type::is_first_window_ = false;
249  }
250 
251  // Fill window with data.
252  update_();
253  }
254 
255  void update_()
256  {
257  // Basic check again.
258  assert( parent_ );
259 
260  // Dequeue everything that is not part of the current interval any more.
261  // We can speed up by clearing the whole window if its last entry is before the current
262  // start, as in that case, all its entries are, so we want to pop them all anyway.
263  // That is the default case when moving with stride = width, so that's nice.
264  if(
265  window_.entries().size() > 0 &&
266  window_.entries().back().position < current_start_
267  ) {
268  window_.entries().clear();
269  } else {
270  while(
271  window_.entries().size() > 0 &&
272  window_.entries().front().position < current_start_
273  ) {
274  window_.entries().pop_front();
275  }
276  }
277 
278  // Now enqueue new entries.
279  while( base_iterator_type::current_ != base_iterator_type::end_ ) {
280  auto const cur_pos = parent_->position_function( *base_iterator_type::current_ );
281 
282  // Get the chromosome and position of the current entry, and see if it belongs
283  // into the current window. If not, we are done here with this window.
284  if(
285  parent_->chromosome_function(
286  *base_iterator_type::current_
287  ) != window_.chromosome() ||
288  cur_pos >= current_start_ + parent_->width_
289  ) {
290  break;
291  }
292  assert(
293  parent_->chromosome_function( *base_iterator_type::current_ ) ==
294  window_.chromosome()
295  );
296  assert( cur_pos >= current_start_ );
297  assert( cur_pos < current_start_ + parent_->width_ );
298 
299  // Check that we are not going backwards in the chromosome,
300  // i.e., if we got unsorted data. That would lead to unwanted behaviour.
301  if(
302  window_.size() > 0 &&
303  window_.entries().back().position >= cur_pos
304  ) {
305  throw std::runtime_error(
306  "Invalid entry in sliding window that not in sequence with other entries. "
307  "Previous entry is " + window_.chromosome() + ":" +
308  std::to_string( window_.entries().back().position ) +
309  ", current (invalid) entry is " + window_.chromosome() + ":" +
310  std::to_string( cur_pos )
311  );
312  }
313 
314  // Now enqueue the entry, and move to the next.
315  window_.entries().emplace_back(
316  next_index_,
317  cur_pos,
318  parent_->entry_input_function( *base_iterator_type::current_ )
319  );
320  ++next_index_;
321  ++base_iterator_type::current_;
322  }
323 
324  // Cases in which we are at the last window: Either we reached the end of the input,
325  // or the end of the current chromosome.
326  if(
327  base_iterator_type::current_ == base_iterator_type::end_ ||
328  parent_->chromosome_function( *base_iterator_type::current_ ) != window_.chromosome()
329  ) {
330  base_iterator_type::is_last_window_ = true;
331  }
332 
333  // Update the window positions.
334  window_.first_position( current_start_ );
335  window_.last_position( current_start_ + parent_->width_ - 1 );
336  }
337 
338  value_type& get_current_window_() const override final
339  {
340  return const_cast<value_type&>( window_ );
341  }
342 
343  BaseWindowStream<InputStreamIterator, DataType> const* get_parent_() const override final
344  {
345  return parent_;
346  }
347 
348  private:
349 
350  // Parent. Needs to live here to have the correct derived type.
351  IntervalWindowStream const* parent_ = nullptr;
352 
353  // Current window and its position
354  Window window_;
355  size_t current_start_ = 1;
356  size_t next_index_ = 0;
357 
358  };
359 
360  // ======================================================================================
361  // Main Class
362  // ======================================================================================
363 
364  // -------------------------------------------------------------------------
365  // Constructors and Rule of Five
366  // -------------------------------------------------------------------------
367 
369  InputStreamIterator begin, InputStreamIterator end,
370  size_t width = 0, size_t stride = 0
371  )
372  : base_type( begin, end )
373  , width_( width )
374  , stride_( stride )
375  {}
376 
377  virtual ~IntervalWindowStream() override = default;
378 
379  IntervalWindowStream( IntervalWindowStream const& ) = default;
381 
384 
386 
387  // -------------------------------------------------------------------------
388  // Settings
389  // -------------------------------------------------------------------------
390 
397  self_type& width( size_t value )
398  {
399  width_ = value;
400  return *this;
401  }
402 
403  size_t width() const
404  {
405  return width_;
406  }
407 
415  self_type& stride( size_t value )
416  {
417  stride_ = value;
418  return *this;
419  }
420 
421  size_t stride() const
422  {
423  return stride_;
424  }
425 
437  {
438  emit_leading_empty_windows_ = value;
439  return *this;
440  }
441 
443  {
444  return emit_leading_empty_windows_;
445  }
446 
447  // -------------------------------------------------------------------------
448  // Virtual Members
449  // -------------------------------------------------------------------------
450 
451 protected:
452 
453  std::unique_ptr<typename BaseWindowStream<InputStreamIterator, DataType>::BaseIterator>
454  get_begin_iterator_() override final
455  {
456  // Cannot use make_unique here, as the Iterator constructor is private,
457  // and trying to make make_unique a friend does not seem to be working...
458  return std::unique_ptr<DerivedIterator>( new DerivedIterator( this ));
459  // return utils::make_unique<DerivedIterator>( this );
460  }
461 
462  std::unique_ptr<typename BaseWindowStream<InputStreamIterator, DataType>::BaseIterator>
463  get_end_iterator_() override final
464  {
465  return std::unique_ptr<DerivedIterator>( new DerivedIterator() );
466  // return utils::make_unique<DerivedIterator>( nullptr );
467  }
468 
469  // -------------------------------------------------------------------------
470  // Data Members
471  // -------------------------------------------------------------------------
472 
473 private:
474 
475  // Settings. We make stride_ mutable so that the iterator can set it to the width.
476  size_t width_ = 0;
477  mutable size_t stride_ = 0;
478 
479  bool emit_leading_empty_windows_ = false;
480 
481  // bool emit empty_windows = true;
482  // bool emit_unfinished_trailing_window = false;
483 
484 };
485 
486 // =================================================================================================
487 // Make Sliding Window Stream
488 // =================================================================================================
489 
499 template<class InputStreamIterator, class DataType = typename InputStreamIterator::value_type>
500 IntervalWindowStream<InputStreamIterator, DataType>
502  InputStreamIterator begin, InputStreamIterator end, size_t width = 0, size_t stride = 0
503 ) {
505  it.width( width );
506  it.stride( stride );
507  return it;
508 }
509 
520 template<class InputStreamIterator>
521 IntervalWindowStream<InputStreamIterator>
523  InputStreamIterator begin, InputStreamIterator end, size_t width = 0, size_t stride = 0
524 ) {
525  using DataType = typename InputStreamIterator::value_type;
526 
527  // Set functors.
528  auto it = IntervalWindowStream<InputStreamIterator>( begin, end );
529  it.entry_input_function = []( DataType const& variant ) {
530  return variant;
531  };
532  it.chromosome_function = []( DataType const& variant ) {
533  return variant.chromosome;
534  };
535  it.position_function = []( DataType const& variant ) {
536  return variant.position;
537  };
538 
539  // Set properties.
540  it.width( width );
541  it.stride( stride );
542  return it;
543 }
544 
556 template<class InputStreamIterator>
557 WindowViewStream<InputStreamIterator>
559  InputStreamIterator begin, InputStreamIterator end, size_t width = 0, size_t stride = 0
560 ) {
562  make_default_interval_window_stream( begin, end, width, stride )
563  );
564 }
565 
566 } // namespace population
567 } // namespace genesis
568 
569 #endif // include guard
genesis::population::IntervalWindowStream::width
size_t width() const
Definition: interval_window_stream.hpp:403
genesis::population::BaseWindowStream
Base class for streams of Windows over the chromosomes of a genome.
Definition: base_window_stream.hpp:119
genesis::population::BaseWindowStream::BaseIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: base_window_stream.hpp:491
base_window_stream.hpp
genesis::population::BaseWindowStream::chromosome_function
std::function< std::string(InputType const &)> chromosome_function
Functor that yields the current chromosome, given the input stream data.
Definition: base_window_stream.hpp:152
genesis::population::IntervalWindowStream::get_end_iterator_
std::unique_ptr< typename BaseWindowStream< InputStreamIterator, DataType >::BaseIterator > get_end_iterator_() override final
Get the end iterator.
Definition: interval_window_stream.hpp:463
genesis::population::BaseWindowStream< InputStreamIterator, typename InputStreamIterator::value_type >::end
Iterator end()
Definition: base_window_stream.hpp:759
genesis::population::Window< DataType >
genesis::population::IntervalWindowStream::stride
size_t stride() const
Definition: interval_window_stream.hpp:421
genesis::population::IntervalWindowStream
Stream for sliding Windows of fixed sized intervals over the chromosomes of a genome.
Definition: interval_window_stream.hpp:74
window_view.hpp
genesis::population::make_default_sliding_interval_window_view_stream
WindowViewStream< InputStreamIterator > make_default_sliding_interval_window_view_stream(InputStreamIterator begin, InputStreamIterator end, size_t width=0, size_t stride=0)
Helper class that creates a IntervalWindowStream and wraps it in a WindowViewStream.
Definition: interval_window_stream.hpp:558
genesis::population::BaseWindowStream::BaseIterator::value_type
WindowType value_type
Definition: base_window_stream.hpp:492
genesis::population::BaseWindowStream::BaseIterator::const_reference
value_type const & const_reference
Definition: base_window_stream.hpp:495
genesis::population::IntervalWindowStream::DerivedIterator::~DerivedIterator
virtual ~DerivedIterator() override=default
genesis::population::make_default_interval_window_stream
IntervalWindowStream< InputStreamIterator > make_default_interval_window_stream(InputStreamIterator begin, InputStreamIterator end, size_t width=0, size_t stride=0)
Helper function to instantiate a IntervalWindowStream for a default use case.
Definition: interval_window_stream.hpp:522
genesis::population::BaseWindowStream< InputStreamIterator, typename InputStreamIterator::value_type >::DataType
typename InputStreamIterator::value_type DataType
Definition: base_window_stream.hpp:128
genesis::population::IntervalWindowStream::const_reference
value_type const & const_reference
Definition: interval_window_stream.hpp:93
genesis::population::IntervalWindowStream::DerivedIterator
friend DerivedIterator
Definition: interval_window_stream.hpp:385
genesis::population::Window::size
size_t size() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:237
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::population::IntervalWindowStream::InputType
typename InputStreamIterator::value_type InputType
Definition: interval_window_stream.hpp:87
genesis::population::IntervalWindowStream::DerivedIterator::value_type
Window value_type
Definition: interval_window_stream.hpp:123
genesis::population::IntervalWindowStream::iterator_category
std::input_iterator_tag iterator_category
Definition: interval_window_stream.hpp:89
genesis::population::IntervalWindowStream::DerivedIterator::Entry
typename Window::Entry Entry
Definition: interval_window_stream.hpp:119
genesis::population::IntervalWindowStream::get_begin_iterator_
std::unique_ptr< typename BaseWindowStream< InputStreamIterator, DataType >::BaseIterator > get_begin_iterator_() override final
Get the begin iterator.
Definition: interval_window_stream.hpp:454
genesis::population::IntervalWindowStream::~IntervalWindowStream
virtual ~IntervalWindowStream() override=default
genesis::population::IntervalWindowStream::width
self_type & width(size_t value)
Width of the Window, that is, the fixed length along the chromosome.
Definition: interval_window_stream.hpp:397
genesis::population::BaseWindow::chromosome
std::string const & chromosome() const
Get the chromosome name that this Window belongs to.
Definition: base_window.hpp:90
genesis::population::Window::entries
container const & entries() const
Immediate container access to the Data Entries.
Definition: window.hpp:362
genesis::population::BaseWindowStream::position_function
std::function< size_t(InputType const &)> position_function
Functor that yields the current position on the chromosome, given the input stream data.
Definition: base_window_stream.hpp:158
genesis::population::IntervalWindowStream::DerivedIterator::IntervalWindowStream
friend IntervalWindowStream
Definition: interval_window_stream.hpp:179
genesis::population::BaseWindowStream::BaseIterator::BaseIterator
BaseIterator()=default
genesis::population::BaseWindowStream::BaseIterator::self_type
typename BaseWindowStream< InputStreamType, DataType, WindowType >::BaseIterator self_type
Definition: base_window_stream.hpp:488
window_view_stream.hpp
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:125
genesis::population::IntervalWindowStream::emit_leading_empty_windows
bool emit_leading_empty_windows() const
Definition: interval_window_stream.hpp:442
genesis::population::IntervalWindowStream::operator=
IntervalWindowStream & operator=(IntervalWindowStream const &)=default
genesis::population::BaseWindowStream< InputStreamIterator, typename InputStreamIterator::value_type >::begin
Iterator begin()
Definition: base_window_stream.hpp:747
genesis::population::IntervalWindowStream::DerivedIterator::operator=
DerivedIterator & operator=(self_type const &)=default
genesis::population::BaseWindow::clear
void clear()
Clear all data from the Window.
Definition: base_window.hpp:234
window.hpp
genesis::population::BaseWindowStream::entry_input_function
std::function< DataType(InputType const &)> entry_input_function
Functor to convert from the underlying input stream that provides the data to fill the windows to the...
Definition: base_window_stream.hpp:147
genesis::population::make_window_view_stream
WindowViewStream< typename T::InputStreamType, typename T::DataType > make_window_view_stream(T const &window_iterator)
Create a WindowViewStream that iterates some underlying BaseWindowStream.
Definition: window_view_stream.hpp:317
genesis::population::IntervalWindowStream::stride
self_type & stride(size_t value)
Stride of the Window, that is, how many positions to move forward with each iteration.
Definition: interval_window_stream.hpp:415
genesis::population::IntervalWindowStream::emit_leading_empty_windows
self_type & emit_leading_empty_windows(bool value)
Select whether the iterator produces empty windows in the beginning of each chromosome,...
Definition: interval_window_stream.hpp:436
genesis::population::BaseWindowStream::BaseIterator::pointer
value_type * pointer
Definition: base_window_stream.hpp:493
genesis::population::IntervalWindowStream::DerivedIterator::base_iterator_type
typename BaseWindowStream< InputStreamIterator, DataType >::BaseIterator base_iterator_type
Definition: interval_window_stream.hpp:116
genesis::population::IntervalWindowStream::DerivedIterator
Internal iterator that produces Windows.
Definition: interval_window_stream.hpp:102
genesis::population::IntervalWindowStream::IntervalWindowStream
IntervalWindowStream(InputStreamIterator begin, InputStreamIterator end, size_t width=0, size_t stride=0)
Definition: interval_window_stream.hpp:368
genesis::population::BaseWindow::first_position
size_t first_position() const
Get the first position in the chromosome of the Window, that is, where the Window starts.
Definition: base_window.hpp:114
genesis::population::BaseWindow::last_position
size_t last_position() const
Get the last position in the chromosome of the Window, that is, where the Window ends.
Definition: base_window.hpp:134
genesis::population::BaseWindowStream::BaseIterator::reference
value_type & reference
Definition: base_window_stream.hpp:494
genesis::population::IntervalWindowStream::DerivedIterator::Window
::genesis::population::Window< DataType > Window
Definition: interval_window_stream.hpp:118
genesis::population::IntervalWindowStream::Entry
typename Window::Entry Entry
Definition: interval_window_stream.hpp:86
genesis::population::IntervalWindowStream::Window
::genesis::population::Window< DataType > Window
Definition: interval_window_stream.hpp:85
genesis::population::BaseWindowStream::BaseIterator::InputType
typename InputStreamType::value_type InputType
Definition: base_window_stream.hpp:489
genesis::population::make_interval_window_stream
IntervalWindowStream< InputStreamIterator, DataType > make_interval_window_stream(InputStreamIterator begin, InputStreamIterator end, size_t width=0, size_t stride=0)
Helper function to instantiate a IntervalWindowStream without the need to specify the template parame...
Definition: interval_window_stream.hpp:501