A library for working with phylogenetic and population genetic data.
v0.27.0
sliding_interval_window_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_SLIDING_INTERVAL_WINDOW_ITERATOR_H_
2 #define GENESIS_POPULATION_WINDOW_SLIDING_INTERVAL_WINDOW_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
35 
36 #include <cassert>
37 #include <functional>
38 #include <stdexcept>
39 #include <string>
40 #include <type_traits>
41 #include <utility>
42 
43 namespace genesis {
44 namespace population {
45 
46 // =================================================================================================
47 // Sliding Interval Window Iterator
48 // =================================================================================================
49 
65 template<class ForwardIterator, class DataType = typename ForwardIterator::value_type>
66 class SlidingIntervalWindowIterator final : public BaseWindowIterator<ForwardIterator, DataType>
67 {
68 public:
69 
70  // -------------------------------------------------------------------------
71  // Typedefs and Enums
72  // -------------------------------------------------------------------------
73 
76 
78  using Entry = typename Window::Entry;
79  using InputType = typename ForwardIterator::value_type;
80 
81  using iterator_category = std::input_iterator_tag;
82  using value_type = Window;
83  using pointer = value_type*;
85  using const_reference = value_type const&;
86 
87  // ======================================================================================
88  // Internal Iterator
89  // ======================================================================================
90 
94  class DerivedIterator final : public BaseWindowIterator<ForwardIterator, DataType>::BaseIterator
95  {
96  public:
97 
98  // -------------------------------------------------------------------------
99  // Constructors and Rule of Five
100  // -------------------------------------------------------------------------
101 
102  using self_type = typename SlidingIntervalWindowIterator<
103  ForwardIterator, DataType
105 
106  using base_iterator_type = typename BaseWindowIterator<
107  ForwardIterator, DataType
109 
111  using Entry = typename Window::Entry;
112  using InputType = typename ForwardIterator::value_type;
113 
114  using iterator_category = std::input_iterator_tag;
116  using pointer = value_type*;
118  using const_reference = value_type const&;
119 
120  private:
121 
122  DerivedIterator() = default;
123 
125  SlidingIntervalWindowIterator const* parent
126  )
127  : base_iterator_type( parent )
128  , parent_( parent )
129  {
130  // Edge case check. See Base for details.
131  if( ! parent_ ) {
132  return;
133  }
134 
135  // Call the base class initializer.
136  // base_iterator_type::init_( parent_ );
137 
138  // Check our own the settings.
139  if( parent_->width_ == 0 ) {
140  throw std::runtime_error( "Cannot use SlidingIntervalWindowIterator of width 0." );
141  }
142  if( parent_->stride_ == 0 ) {
143  parent_->stride_ = parent_->width_;
144  }
145  if( parent_->stride_ > parent_->width_ ) {
146  throw std::runtime_error(
147  "Cannot use SlidingIntervalWindowIterator with stride > width."
148  );
149  }
150 
151  // Let's get going.
152  init_chromosome_();
153  update_();
154  }
155 
156  public:
157 
158  ~DerivedIterator() = default;
159 
160  DerivedIterator( self_type const& ) = default;
161  DerivedIterator( self_type&& ) = default;
162 
163  DerivedIterator& operator= ( self_type const& ) = default;
164  DerivedIterator& operator= ( self_type&& ) = default;
165 
167 
168  // -------------------------------------------------------------------------
169  // Internal and Virtual Members
170  // -------------------------------------------------------------------------
171 
172  private:
173 
174  void init_chromosome_()
175  {
176  // Saveguard. This might be called on an empty range, in which case we just do nothing.
177  if( base_iterator_type::current_ == base_iterator_type::end_ ) {
178  return;
179  }
180 
181  // Clear the window and prepare for new chromosome.
182  window_.clear();
183  window_.chromosome( parent_->chromosome_function( *base_iterator_type::current_ ));
184  base_iterator_type::is_first_window_ = true;
185  base_iterator_type::is_last_window_ = false;
186  next_index_ = 0;
187 
188  if( parent_->emit_leading_empty_windows_ ) {
189  current_start_ = 1;
190  } else {
191  // Set the start to the window position that we would get after going through all
192  // the previous windows if they were emitted.
193  auto const pos = parent_->position_function( *base_iterator_type::current_ );
194  current_start_ = pos - (( pos - 1 ) % parent_->stride_ );
195  }
196  }
197 
198  void increment_() override final
199  {
200  // Special case: If we have no more underlying data, the iterator still needs to stop
201  // at the last window(s), so that they can be processed. After that, when this
202  // increment_() function is called again by the user, we then set parent_ = nullptr
203  // to indicate that now we are done for good.
204  if( base_iterator_type::current_ == base_iterator_type::end_ ) {
205  // If current_ == end_, we have definitely reached the end of the input, so we need
206  // to have set is_last_window_ previously. If not set, that means it was already
207  // reset, so that this is an iteration past the end.
208  if( ! base_iterator_type::is_last_window_ ) {
209  throw std::runtime_error(
210  "SlidingIntervalWindowIterator: Incrementing past the end"
211  );
212  }
213 
214  // Indicate that we are done now.
215  parent_ = nullptr;
216  return;
217  }
218 
219  // Check if this call moves to the next chromosome.
220  if(
221  parent_->chromosome_function( *base_iterator_type::current_ ) != window_.chromosome()
222  ) {
223  init_chromosome_();
224  } else {
225  // Update positions.
226  current_start_ += parent_->stride_;
227  base_iterator_type::is_first_window_ = false;
228  }
229 
230  // Fill window with data.
231  update_();
232  }
233 
234  void update_()
235  {
236  // Dequeue everything that is not part of the current interval any more.
237  // We can speed up by clearing the whole window if its last entry is before the current
238  // start, as in that case, all its entries are, so we want to pop them all anyway.
239  // That is the default case when moving with stride = width, so that's nice.
240  if(
241  window_.entries().size() > 0 &&
242  window_.entries().back().position < current_start_
243  ) {
244  window_.entries().clear();
245  } else {
246  while(
247  window_.entries().size() > 0 &&
248  window_.entries().front().position < current_start_
249  ) {
250  window_.entries().pop_front();
251  }
252  }
253 
254  // Now enqueue new entries.
255  while( base_iterator_type::current_ != base_iterator_type::end_ ) {
256  auto const cur_pos = parent_->position_function( *base_iterator_type::current_ );
257 
258  // Get the chromosome and position of the current entry, and see if it belongs
259  // into the current window. If not, we are done here with this window.
260  if(
261  parent_->chromosome_function(
262  *base_iterator_type::current_
263  ) != window_.chromosome() ||
264  cur_pos >= current_start_ + parent_->width_
265  ) {
266  break;
267  }
268  assert(
269  parent_->chromosome_function( *base_iterator_type::current_ ) ==
270  window_.chromosome()
271  );
272  assert( cur_pos >= current_start_ );
273  assert( cur_pos < current_start_ + parent_->width_ );
274 
275  // Check that we are not going backwards in the chromosome,
276  // i.e., if we got unsorted data. That would lead to unwanted behaviour.
277  if(
278  window_.size() > 0 &&
279  window_.entries().back().position >= cur_pos
280  ) {
281  throw std::runtime_error(
282  "Invalid entry in sliding window that not in sequence with other entries. "
283  "Previous entry is " + window_.chromosome() + ":" +
284  std::to_string( window_.entries().back().position ) +
285  ", current (invalid) entry is " + window_.chromosome() + ":" +
286  std::to_string( cur_pos )
287  );
288  }
289 
290  // Now enqueue the entry, and move to the next.
291  window_.entries().emplace_back(
292  next_index_,
293  cur_pos,
294  parent_->entry_input_function( *base_iterator_type::current_ )
295  );
296  ++next_index_;
297  ++base_iterator_type::current_;
298  }
299 
300  // Cases in which we are at the last window: Either we reached the end of the input,
301  // or the end of the current chromosome.
302  if(
303  base_iterator_type::current_ == base_iterator_type::end_ ||
304  parent_->chromosome_function( *base_iterator_type::current_ ) != window_.chromosome()
305  ) {
306  base_iterator_type::is_last_window_ = true;
307  }
308 
309  // Update the window positions.
310  window_.first_position( current_start_ );
311  window_.last_position( current_start_ + parent_->width_ - 1 );
312  }
313 
314  value_type& get_current_window_() const override final
315  {
316  return const_cast<value_type&>( window_ );
317  }
318 
319  BaseWindowIterator<ForwardIterator, DataType> const* get_parent_() const override final
320  {
321  return parent_;
322  }
323 
324  private:
325 
326  // Parent. Needs to live here to have the correct derived type.
327  SlidingIntervalWindowIterator const* parent_ = nullptr;
328 
329  // Current window and its position
330  Window window_;
331  size_t current_start_ = 1;
332  size_t next_index_ = 0;
333 
334  };
335 
336  // ======================================================================================
337  // Main Class
338  // ======================================================================================
339 
340  // -------------------------------------------------------------------------
341  // Constructors and Rule of Five
342  // -------------------------------------------------------------------------
343 
345  ForwardIterator begin, ForwardIterator end
346  )
347  : base_type( begin, end )
348  {}
349 
350  ~SlidingIntervalWindowIterator() = default;
351 
354 
357 
359 
360  // -------------------------------------------------------------------------
361  // Settings
362  // -------------------------------------------------------------------------
363 
370  self_type& width( size_t value )
371  {
372  width_ = value;
373  return *this;
374  }
375 
376  size_t width() const
377  {
378  return width_;
379  }
380 
388  self_type& stride( size_t value )
389  {
390  stride_ = value;
391  return *this;
392  }
393 
394  size_t stride() const
395  {
396  return stride_;
397  }
398 
410  {
411  emit_leading_empty_windows_ = value;
412  return *this;
413  }
414 
416  {
417  return emit_leading_empty_windows_;
418  }
419 
420  // -------------------------------------------------------------------------
421  // Virtual Members
422  // -------------------------------------------------------------------------
423 
424 protected:
425 
426  std::unique_ptr<typename BaseWindowIterator<ForwardIterator, DataType>::BaseIterator>
427  get_begin_iterator_() override final
428  {
429  // Cannot use make_unique here, as the Iterator constructor is private,
430  // and trying to make make_unique a friend does not seem to be working...
431  return std::unique_ptr<DerivedIterator>( new DerivedIterator( this ));
432  // return utils::make_unique<DerivedIterator>( this );
433  }
434 
435  std::unique_ptr<typename BaseWindowIterator<ForwardIterator, DataType>::BaseIterator>
436  get_end_iterator_() override final
437  {
438  return std::unique_ptr<DerivedIterator>( new DerivedIterator( nullptr ));
439  // return utils::make_unique<DerivedIterator>( nullptr );
440  }
441 
442  // -------------------------------------------------------------------------
443  // Data Members
444  // -------------------------------------------------------------------------
445 
446 private:
447 
448  // Settings. We make stride_ mutable so that the iterator can set it to the width.
449  size_t width_ = 0;
450  mutable size_t stride_ = 0;
451 
452  bool emit_leading_empty_windows_ = true;
453 
454  // bool emit empty_windows = true;
455  // bool emit_unfinished_trailing_window = false;
456 
457 };
458 
459 // =================================================================================================
460 // Make Sliding Window Iterator
461 // =================================================================================================
462 
472 template<class ForwardIterator, class DataType = typename ForwardIterator::value_type>
473 SlidingIntervalWindowIterator<ForwardIterator, DataType>
475  ForwardIterator begin, ForwardIterator end, size_t width = 0, size_t stride = 0
476 ) {
478  it.width( width );
479  it.stride( stride );
480  return it;
481 }
482 
493 template<class ForwardIterator>
494 SlidingIntervalWindowIterator<ForwardIterator>
496  ForwardIterator begin, ForwardIterator end, size_t width = 0, size_t stride = 0
497 ) {
498  using DataType = typename ForwardIterator::value_type;
499 
500  // Set functors.
501  auto it = SlidingIntervalWindowIterator<ForwardIterator>( begin, end );
502  it.entry_input_function = []( DataType const& variant ) {
503  return variant;
504  };
505  it.chromosome_function = []( DataType const& variant ) {
506  return variant.chromosome;
507  };
508  it.position_function = []( DataType const& variant ) {
509  return variant.position;
510  };
511 
512  // Set properties.
513  it.width( width );
514  it.stride( stride );
515  return it;
516 }
517 
518 } // namespace population
519 } // namespace genesis
520 
521 #endif // include guard
genesis::population::SlidingIntervalWindowIterator::stride
size_t stride() const
Definition: sliding_interval_window_iterator.hpp:394
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::Window
::genesis::population::Window< DataType > Window
Definition: sliding_interval_window_iterator.hpp:110
genesis::population::BaseWindowIterator
Base iterator class for Windows over the chromosomes of a genome.
Definition: base_window_iterator.hpp:106
genesis::population::SlidingIntervalWindowIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: sliding_interval_window_iterator.hpp:81
genesis::population::SlidingIntervalWindowIterator::get_end_iterator_
std::unique_ptr< typename BaseWindowIterator< ForwardIterator, DataType >::BaseIterator > get_end_iterator_() override final
Definition: sliding_interval_window_iterator.hpp:436
genesis::population::SlidingIntervalWindowIterator::emit_leading_empty_windows
self_type & emit_leading_empty_windows(bool value)
Select whether the iterator produces empty windows in the beginning of each chromosome,...
Definition: sliding_interval_window_iterator.hpp:409
genesis::population::BaseWindowIterator::BaseIterator::BaseIterator
BaseIterator(BaseWindowIterator const *parent)
Construct the base class, which does initialization checks on its member variables to ensure that the...
Definition: base_window_iterator.hpp:370
genesis::population::SlidingIntervalWindowIterator::DerivedIterator
friend DerivedIterator
Definition: sliding_interval_window_iterator.hpp:358
genesis::population::Window< DataType >
genesis::population::BaseWindowIterator::BaseIterator
Internal PIMPL-like implementation of the iterator that produces Windows.
Definition: base_window_iterator.hpp:342
genesis::population::SlidingIntervalWindowIterator::~SlidingIntervalWindowIterator
~SlidingIntervalWindowIterator()=default
genesis::population::SlidingIntervalWindowIterator::width
size_t width() const
Definition: sliding_interval_window_iterator.hpp:376
genesis::population::SlidingIntervalWindowIterator::emit_leading_empty_windows
bool emit_leading_empty_windows() const
Definition: sliding_interval_window_iterator.hpp:415
genesis::population::SlidingIntervalWindowIterator::Entry
typename Window::Entry Entry
Definition: sliding_interval_window_iterator.hpp:78
genesis::population::SlidingIntervalWindowIterator::InputType
typename ForwardIterator::value_type InputType
Definition: sliding_interval_window_iterator.hpp:79
genesis::population::Window::size
size_t size() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:277
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genesis::population::BaseWindowIterator::BaseIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: base_window_iterator.hpp:356
genesis::population::make_default_sliding_interval_window_iterator
SlidingIntervalWindowIterator< ForwardIterator > make_default_sliding_interval_window_iterator(ForwardIterator begin, ForwardIterator end, size_t width=0, size_t stride=0)
Helper function to instantiate a SlidingIntervalWindowIterator for a default use case.
Definition: sliding_interval_window_iterator.hpp:495
genesis::population::BaseWindowIterator::entry_input_function
std::function< DataType(InputType const &)> entry_input_function
Functor to convert from the underlying input iterator that provides the data to fill the windows to t...
Definition: base_window_iterator.hpp:134
base_window_iterator.hpp
genesis::population::BaseWindowIterator::chromosome_function
std::function< std::string(InputType const &)> chromosome_function
Functor that yields the current chromosome, given the input iterator data.
Definition: base_window_iterator.hpp:139
genesis::population::Window::last_position
size_t last_position() const
Get the last position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:358
genesis::population::SlidingIntervalWindowIterator::DerivedIterator
Internal iterator that produces Windows.
Definition: sliding_interval_window_iterator.hpp:94
genesis::population::SlidingIntervalWindowIterator
Iterator for sliding Windows of fixed sized intervals over the chromosomes of a genome.
Definition: sliding_interval_window_iterator.hpp:66
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::base_iterator_type
typename BaseWindowIterator< ForwardIterator, DataType >::BaseIterator base_iterator_type
Definition: sliding_interval_window_iterator.hpp:108
genesis::population::Window::entries
container const & entries() const
Immediate container access to the Data Entries.
Definition: window.hpp:461
genesis::population::BaseWindowIterator< ForwardIterator, typename ForwardIterator::value_type >::begin
Iterator begin()
Definition: base_window_iterator.hpp:502
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::operator=
DerivedIterator & operator=(self_type const &)=default
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::make_sliding_interval_window_iterator
SlidingIntervalWindowIterator< ForwardIterator, DataType > make_sliding_interval_window_iterator(ForwardIterator begin, ForwardIterator end, size_t width=0, size_t stride=0)
Helper function to instantiate a SlidingIntervalWindowIterator without the need to specify the templa...
Definition: sliding_interval_window_iterator.hpp:474
genesis::population::BaseWindowIterator::position_function
std::function< size_t(InputType const &)> position_function
Functor that yields the current position on the chromosome, given the input iterator data.
Definition: base_window_iterator.hpp:145
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:124
genesis::population::SlidingIntervalWindowIterator::SlidingIntervalWindowIterator
SlidingIntervalWindowIterator(ForwardIterator begin, ForwardIterator end)
Definition: sliding_interval_window_iterator.hpp:344
genesis::population::BaseWindowIterator::BaseIterator::const_reference
value_type const & const_reference
Definition: base_window_iterator.hpp:360
genesis::population::SlidingIntervalWindowIterator::Window
::genesis::population::Window< DataType > Window
Definition: sliding_interval_window_iterator.hpp:77
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::SlidingIntervalWindowIterator
friend SlidingIntervalWindowIterator
Definition: sliding_interval_window_iterator.hpp:166
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::value_type
Window value_type
Definition: sliding_interval_window_iterator.hpp:115
genesis::population::SlidingIntervalWindowIterator::DerivedIterator::~DerivedIterator
~DerivedIterator()=default
genesis::population::SlidingIntervalWindowIterator::stride
self_type & stride(size_t value)
Stride of the Window, that is, how many positions to move forward with each iteration.
Definition: sliding_interval_window_iterator.hpp:388
genesis::population::SlidingIntervalWindowIterator::operator=
SlidingIntervalWindowIterator & operator=(SlidingIntervalWindowIterator const &)=default
genesis::population::Window::first_position
size_t first_position() const
Get the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:338
genesis::population::Window::clear
void clear()
Clear all data from the Window.
Definition: window.hpp:526
genesis::population::BaseWindowIterator::BaseIterator::Entry
typename Window::Entry Entry
Definition: base_window_iterator.hpp:353
genesis::population::SlidingIntervalWindowIterator::const_reference
value_type const & const_reference
Definition: sliding_interval_window_iterator.hpp:85
genesis::population::BaseWindowIterator< ForwardIterator, typename ForwardIterator::value_type >::end
Iterator end()
Definition: base_window_iterator.hpp:507
genesis::population::BaseWindowIterator::BaseIterator::InputType
typename ForwardIterator::value_type InputType
Definition: base_window_iterator.hpp:354
genesis::population::Window::chromosome
std::string const & chromosome() const
Get the chromosome name that this Window belongs to.
Definition: window.hpp:224
genesis::population::SlidingIntervalWindowIterator::width
self_type & width(size_t value)
Width of the Window, that is, the fixed length along the chromosome.
Definition: sliding_interval_window_iterator.hpp:370
genesis::population::SlidingIntervalWindowIterator::get_begin_iterator_
std::unique_ptr< typename BaseWindowIterator< ForwardIterator, DataType >::BaseIterator > get_begin_iterator_() override final
Definition: sliding_interval_window_iterator.hpp:427