A library for working with phylogenetic data.
v0.25.0
sliding_window_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_SLIDING_WINDOW_ITERATOR_H_
2 #define GENESIS_POPULATION_WINDOW_SLIDING_WINDOW_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <cassert>
38 #include <functional>
39 #include <stdexcept>
40 #include <string>
41 #include <type_traits>
42 #include <utility>
43 #include <vector>
44 
45 namespace genesis {
46 namespace population {
47 
48 // =================================================================================================
49 // Genomic Sliding Window Iterator
50 // =================================================================================================
51 
69 template<class InputType, class DataType = InputType>
71 {
77 
83 
92  size_t width = 0;
93 
107  size_t stride = 0;
108 
113 
114 
115  // bool emit_unfinished_trailing_window = false;
116  //
117  // bool start_from_one = false
118  //
119  // bool emit empty_windows = true;
120 
121 
126  std::function<DataType( InputType const& )> entry_input_function;
127 
131  std::function<std::string( InputType const& )> chromosome_function;
132 
137  std::function<size_t( InputType const& )> position_function;
138 };
139 
143 template<class ForwardIterator, class InputType, class DataType = InputType>
145 {
146 public:
147 
148  // -------------------------------------------------------------------------
149  // Typedefs and Enums
150  // -------------------------------------------------------------------------
151 
152  // using Data = typename ForwardIterator::value_type;
153  // using Data = typename std::result_of<decltype(EntryInputFunctor)()>::type;
154 
156  using Entry = typename Window::Entry;
157 
162  using const_reference = value_type const&;
163 
164  // using difference_type = typename Window::container::difference_type;
165  // using size_type = typename Window::container::size_type;
166  using iterator_category = std::input_iterator_tag;
167 
168  // -------------------------------------------------------------------------
169  // Constructors and Rule of Five
170  // -------------------------------------------------------------------------
171 
173  settings_type const& settings,
174  ForwardIterator begin, ForwardIterator end
175  )
176  : settings_(settings)
177  , begin_(begin)
178  , current_(begin)
179  , end_(end)
180  {
181  // Some boundary checks.
182  if( settings_.width == 0 ) {
183  throw std::runtime_error( "Cannot use SlidingWindowIterator of width 0." );
184  }
185  if( settings_.stride == 0 ) {
186  settings_.stride = settings_.width;
187  }
188  if( settings_.stride > settings_.width ) {
189  throw std::runtime_error( "Cannot use SlidingWindowIterator with stride > width." );
190  }
191  window_.anchor_type( settings_.anchor_type );
192 
193  // Error checking.
194  if( ! settings_.entry_input_function ) {
195  throw std::runtime_error(
196  "Need to set SlidingWindowIteratorSettings::entry_input_function before using it "
197  "to construct a SlidingWindowIterator"
198  );
199  }
200  if( ! settings_.chromosome_function ) {
201  throw std::runtime_error(
202  "Need to set SlidingWindowIteratorSettings::chromosome_function before using it "
203  "to construct a SlidingWindowIterator"
204  );
205  }
206  if( ! settings_.position_function ) {
207  throw std::runtime_error(
208  "Need to set SlidingWindowIteratorSettings::position_function before using it "
209  "to construct a SlidingWindowIterator"
210  );
211  }
212 
213  // Let's get going.
214  init_chromosome_();
215  update_();
216  }
217 
218  // SlidingWindowIterator(
219  // settings_type const& settings,
220  // ForwardIterator begin, ForwardIterator end,
221  // std::string const& chromosome,
222  // size_t start_position, size_t end_position
223  // )
224  // : settings_(settings)
225  // , begin_(begin)
226  // , current_(begin)
227  // , end_(end)
228  // {
229  // // Some boundary checks.
230  // if( settings_.width == 0 ) {
231  // throw std::runtime_error( "Cannot use SlidingWindowIterator of width 0." );
232  // }
233  // if( settings_.stride == 0 ) {
234  // settings_.stride = settings_.width;
235  // }
236  // if( settings_.stride > settings_.width ) {
237  // throw std::runtime_error( "Cannot use SlidingWindowIterator with stride > width." );
238  // }
239  // window_.anchor_type( settings_.anchor_type );
240  //
241  // // Let's get going.
242  // init_chromosome_();
243  // update_();
244  // }
245 
246  ~SlidingWindowIterator() = default;
247 
248  SlidingWindowIterator( SlidingWindowIterator const& ) = default;
250 
253 
254  // -------------------------------------------------------------------------
255  // Accessors & Modifiers
256  // -------------------------------------------------------------------------
257 
268  bool is_first_window() const
269  {
270  return is_first_window_;
271  }
272 
283  bool is_last_window() const
284  {
285  return is_last_window_;
286  }
287 
288  // /**
289  // * @brief Get the chromosome name that we are currently processing.
290  // *
291  // * Initially, this is empty. After enqueuing data, it contains the chromosome name of the last
292  // * Data entry that was enqueued.
293  // */
294  // std::string const& chromosome() const
295  // {
296  // // We could keep our own chromosome here, but Window already as a member for this,
297  // // so we just re-use.
298  // return window_.chromosome();
299  // }
300 
301  // -------------------------------------------------------------------------
302  // Basic Iterator Operators
303  // -------------------------------------------------------------------------
304 
305  value_type const& operator*() const
306  {
307  return window_;
308  }
309 
310  value_type const* operator->() const
311  {
312  return &window_;
313  }
314 
315  // -------------------------------------------------------------------------
316  // Iteration
317  // -------------------------------------------------------------------------
318 
320  {
321  increment_();
322  return *this;
323  }
324 
326  {
327  self_type it(*this);
328  ++(*this);
329  return it;
330  }
331 
332  // -------------------------------------------------------------------------
333  // Iterator Comparison
334  // -------------------------------------------------------------------------
335 
336  // bool operator==( self_type const& it ) const
337  // {
338  // return current_ == it.current_;
339  // }
340  //
341  // bool operator!=( self_type const& it ) const
342  // {
343  // return !(*this == it);
344  // }
345 
346  explicit operator bool() const
347  {
348  return current_ != end_ || is_last_window_;
349  }
350 
351  bool good() const
352  {
353  return current_ != end_ || is_last_window_;
354  }
355 
356  // -------------------------------------------------------------------------
357  // Internal Members
358  // -------------------------------------------------------------------------
359 
360 private:
361 
362  void increment_()
363  {
364  // Special case: If we have no more data, the iterator still needs to stop at the last
365  // window, so that it can be processed. Hence, the operator bool() checks for this condition
366  // by testing for is_last_window_. After that, when this increment_() function is called
367  // again by the user, we then set is_last_window_ to false, indicating that now we are
368  // done for good.
369  if( current_ == end_ ) {
370  // If current_ == end_, we have definitely reached the end of the input, so we need
371  // to have set is_last_window_ previously. If not set, that means it was already reset,
372  // so that this is an iteration past the end.
373  if( ! is_last_window_ ) {
374  throw std::runtime_error( "SlidingWindowIterator: Incrementing past the end" );
375  }
376 
377  // Now reset, so that operator bool() returns false, indicating that we are done.
378  is_last_window_ = false;
379  return;
380  }
381 
382  // Check if this call moves to the next chromosome.
383  if( settings_.chromosome_function( *current_ ) != window_.chromosome() ) {
384  init_chromosome_();
385  } else {
386  // Update positions.
387  current_start_ += settings_.stride;
388  is_first_window_ = false;
389  }
390 
391  update_();
392  }
393 
394  void init_chromosome_()
395  {
396  // Saveguard. This might be called on an empty range, in which case we just do nothing.
397  if( current_ == end_ ) {
398  return;
399  }
400 
401  // Clear the window and prepare for new chromosome.
402  window_.clear();
403  window_.chromosome( settings_.chromosome_function( *current_ ));
404  is_first_window_ = true;
405  is_last_window_ = false;
406  next_index_ = 0;
407 
408  if( settings_.emit_leading_empty_windows ) {
409  current_start_ = 0;
410  } else {
411  // Set the start to the window position that we would get after going through all
412  // the previous windows if they were emitted.
413  auto const pos = settings_.position_function( *current_ );
414  current_start_ = pos - pos % settings_.stride;
415  }
416  }
417 
418  void update_()
419  {
420  // Do the correct type of enqueuing.
421  if( settings_.window_type == WindowType::kInterval ) {
422  update_interval_();
423  } else if( settings_.window_type == WindowType::kVariants ) {
424  throw std::runtime_error( "Not yet implemented" );
425  } else {
426  throw std::runtime_error( "Invalid WindowType" );
427  }
428  }
429 
430  void update_interval_()
431  {
432  // Dequeue everything that is not part of the current interval any more.
433  while(
434  ! window_.entries().empty() && window_.entries().front().position < current_start_
435  ) {
436  window_.entries().pop_front();
437  }
438 
439  // Now enqueue new entries.
440  while( current_ != end_ ) {
441  // Get the chromosome and position of the current entry, and see if it belongs
442  // into the current window. If not, we are done here with this window.
443  if(
444  settings_.chromosome_function( *current_ ) != window_.chromosome() ||
445  settings_.position_function( *current_ ) >= current_start_ + settings_.width
446  ) {
447  break;
448  }
449 
450  // Now enqueue the entry, and move to the next.
451  window_.entries().emplace_back(
452  next_index_,
453  settings_.position_function( *current_ ),
454  settings_.entry_input_function( *current_ )
455  );
456  ++next_index_;
457  ++current_;
458  }
459 
460  // Cases in which we are at the last window: Either we reached the end of the input,
461  // or the end of the current chromosome.
462  if( current_ == end_ || settings_.chromosome_function( *current_ ) != window_.chromosome() ) {
463  is_last_window_ = true;
464  }
465 
466  // Update the window positions.
467  window_.first_position( current_start_ );
468  window_.last_position( current_start_ + settings_.width );
469  }
470 
471  // -------------------------------------------------------------------------
472  // Data Members
473  // -------------------------------------------------------------------------
474 
475 private:
476 
477  // All combined settings for the window
478  settings_type settings_;
479 
480  // Current window and its position
481  Window window_;
482  size_t current_start_ = 0;
483  size_t next_index_ = 0;
484 
485  // Need to manually keep track of those...
486  bool is_first_window_ = true;
487  bool is_last_window_ = false;
488 
489  // Underlying iterator
490  ForwardIterator begin_;
491  ForwardIterator current_;
492  ForwardIterator end_;
493 };
494 
495 // =================================================================================================
496 // Make Genomic Sliding Window Iterator
497 // =================================================================================================
498 
503 template<class ForwardIterator, class InputType, class DataType = InputType>
504 SlidingWindowIterator<ForwardIterator, InputType, DataType>
507  ForwardIterator begin, ForwardIterator end
508 ) {
510  settings, begin, end
511  );
512 }
513 
514 } // namespace population
515 } // namespace genesis
516 
517 #endif // include guard
genesis::population::SlidingWindowIteratorSettings
Settings for running a sliding window iteration.
Definition: sliding_window_iterator.hpp:70
genesis::population::SlidingWindowIterator::is_last_window
bool is_last_window() const
Return whether the current iteration is the last of the current chromosome.
Definition: sliding_window_iterator.hpp:283
genesis::population::SlidingWindowIterator::const_reference
value_type const & const_reference
Definition: sliding_window_iterator.hpp:162
genesis::population::SlidingWindowIterator
Iterator for sliding Windows over the chromosomes of a genome.
Definition: sliding_window_iterator.hpp:144
genesis::population::SlidingWindowIteratorSettings::position_function
std::function< size_t(InputType const &)> position_function
Functor that yields the current position on the chromosome, given the input iterator data.
Definition: sliding_window_iterator.hpp:137
genesis::population::SlidingWindowIterator::operator++
self_type & operator++()
Definition: sliding_window_iterator.hpp:319
genesis::population::SlidingWindowIterator::settings_type
SlidingWindowIteratorSettings< InputType, DataType > settings_type
Definition: sliding_window_iterator.hpp:158
genesis::population::SlidingWindowIteratorSettings::entry_input_function
std::function< DataType(InputType const &)> entry_input_function
Functor to convert from the underlying input iterator that provides the data for the sliding window t...
Definition: sliding_window_iterator.hpp:126
genesis::population::Window
Window over the chromosomes of a genome.
Definition: window.hpp:108
genesis::population::SlidingWindowIteratorSettings::anchor_type
WindowAnchorType anchor_type
The type of position that the Window outputs when using its Window::anchor_position() function....
Definition: sliding_window_iterator.hpp:82
genesis::population::WindowAnchorType::kIntervalBegin
@ kIntervalBegin
genesis::population::SlidingWindowIterator::SlidingWindowIterator
SlidingWindowIterator(settings_type const &settings, ForwardIterator begin, ForwardIterator end)
Definition: sliding_window_iterator.hpp:172
genesis::population::SlidingWindowIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: sliding_window_iterator.hpp:166
genesis::population::WindowType::kVariants
@ kVariants
genesis::population::SlidingWindowIteratorSettings::stride
size_t stride
Stride of the Window, that is, how many positions or entries (depending on WindowType) to move forwar...
Definition: sliding_window_iterator.hpp:107
genesis::population::Window::last_position
size_t last_position() const
Get the last (past-the-end) position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:346
genesis::population::SlidingWindowIterator::operator*
value_type const & operator*() const
Definition: sliding_window_iterator.hpp:305
range.hpp
genesis::population::SlidingWindowIteratorSettings::chromosome_function
std::function< std::string(InputType const &)> chromosome_function
Functor that yields the current chromosome, given the input iterator data.
Definition: sliding_window_iterator.hpp:131
genesis::population::SlidingWindowIterator::~SlidingWindowIterator
~SlidingWindowIterator()=default
genesis::population::WindowType::kInterval
@ kInterval
genesis::population::Window::entries
container const & entries() const
Immediate container access to the Data Entries.
Definition: window.hpp:537
genesis::population::SlidingWindowIterator::Entry
typename Window::Entry Entry
Definition: sliding_window_iterator.hpp:156
genesis::population::SlidingWindowIterator::good
bool good() const
Definition: sliding_window_iterator.hpp:351
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:127
genesis::population::SlidingWindowIterator::operator->
value_type const * operator->() const
Definition: sliding_window_iterator.hpp:310
window.hpp
genesis::population::make_sliding_window_iterator
SlidingWindowIterator< ForwardIterator, InputType, DataType > make_sliding_window_iterator(SlidingWindowIteratorSettings< InputType, DataType > const &settings, ForwardIterator begin, ForwardIterator end)
Helper function to instanciate a SlidingWindowIterator without the need to specify all template param...
Definition: sliding_window_iterator.hpp:505
genesis::population::SlidingWindowIterator::is_first_window
bool is_first_window() const
Return whether the current iteration is the first of the current chromosome.
Definition: sliding_window_iterator.hpp:268
genesis::population::SlidingWindowIteratorSettings::emit_leading_empty_windows
bool emit_leading_empty_windows
Definition: sliding_window_iterator.hpp:112
genesis::population::SlidingWindowIterator::Window
::genesis::population::Window< DataType > Window
Definition: sliding_window_iterator.hpp:155
genesis::population::Window::anchor_type
WindowAnchorType anchor_type() const
Get the WindowAnchorType that is currently set for using anchor_position().
Definition: window.hpp:444
genesis::population::Window::first_position
size_t first_position() const
Get the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:325
genesis::population::Window::clear
void clear()
Clear all data from the Window.
Definition: window.hpp:574
genesis::population::WindowType
WindowType
WindowType of a Window, that is, whether we slide along a fixed size interval of the genome,...
Definition: window.hpp:51
genesis::population::WindowAnchorType
WindowAnchorType
Position in the genome that is used for reporting when emitting or using a window.
Definition: window.hpp:69
genesis::population::SlidingWindowIteratorSettings::width
size_t width
Width of the Window, either in fixed length along the chromosome, or in number of variants/entries pe...
Definition: sliding_window_iterator.hpp:92
genesis::population::Window::chromosome
std::string const & chromosome() const
Get the chromosome name that this Window belongs to.
Definition: window.hpp:222
genesis::population::SlidingWindowIteratorSettings::window_type
WindowType window_type
Type of the Window, that is, whether to iterate over intervals of fixed length, or over a certain num...
Definition: sliding_window_iterator.hpp:76
genesis::population::SlidingWindowIterator::operator=
SlidingWindowIterator & operator=(SlidingWindowIterator const &)=default