A library for working with phylogenetic and population genetic data.
v0.27.0
region_window_iterator.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_REGION_WINDOW_ITERATOR_H_
2 #define GENESIS_POPULATION_WINDOW_REGION_WINDOW_ITERATOR_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
39 
40 #include <cassert>
41 #include <deque>
42 #include <functional>
43 #include <stdexcept>
44 #include <string>
45 #include <type_traits>
46 #include <utility>
47 #include <vector>
48 
49 namespace genesis {
50 namespace population {
51 
52 // =================================================================================================
53 // Region Window Iterator
54 // =================================================================================================
55 
83 template<class ForwardIterator, class InputType, class DataType = InputType>
85 {
86 public:
87 
88  // -------------------------------------------------------------------------
89  // Typedefs and Enums
90  // -------------------------------------------------------------------------
91 
92  // using Data = typename ForwardIterator::value_type;
93  // using Data = typename std::result_of<decltype(EntryInputFunctor)()>::type;
94 
96  using Entry = typename Window::Entry;
97 
98  using settings_type = RegionWindowIteratorSettings<InputType, DataType>;
102  using const_reference = value_type const&;
103 
104  // using difference_type = typename Window::container::difference_type;
105  // using size_type = typename Window::container::size_type;
106  using iterator_category = std::input_iterator_tag;
107 
108  // ======================================================================================
109  // Internal Iterator
110  // ======================================================================================
111 
115  class Iterator
116  {
117  public:
118 
119  // -------------------------------------------------------------------------
120  // Constructors and Rule of Five
121  // -------------------------------------------------------------------------
122 
124  using value_type = T;
125  using pointer = value_type const*;
126  using reference = value_type const&;
127  using difference_type = std::ptrdiff_t;
128  using iterator_category = std::input_iterator_tag;
129 
130  using Data = D;
131 
132  private:
133 
134  Iterator() = default;
135 
136  Iterator(
137  RegionWindowIterator const* parent,
138  ForwardIterator begin, ForwardIterator end
139  )
140  : parent_(parent)
141  , cur_locus_(begin)
142  , end_locus_(end)
143  {
144  // Error checking.
145  if( ! parent_->entry_input_function ) {
146  throw std::runtime_error(
147  "Need to set RegionWindowIteratorSettings::entry_input_function before using it "
148  "to construct a RegionWindowIterator"
149  );
150  }
151  if( ! parent_->chromosome_function ) {
152  throw std::runtime_error(
153  "Need to set RegionWindowIteratorSettings::chromosome_function before using it "
154  "to construct a RegionWindowIterator"
155  );
156  }
157  if( ! parent_->position_function ) {
158  throw std::runtime_error(
159  "Need to set RegionWindowIteratorSettings::position_function before using it "
160  "to construct a RegionWindowIterator"
161  );
162  }
163 
164  // Let's get going.
165  init_();
166  increment_();
167  }
168 
169  public:
170 
171  ~Iterator() = default;
172 
173  Iterator( self_type const& ) = default;
174  Iterator( self_type&& ) = default;
175 
176  Iterator& operator= ( self_type const& ) = default;
177  Iterator& operator= ( self_type&& ) = default;
178 
180 
181  // -------------------------------------------------------------------------
182  // Accessors
183  // -------------------------------------------------------------------------
184 
185  value_type const * operator->() const
186  {
187  return &(window_.front());
188  }
189 
191  {
192  return &(window_.front());
193  }
194 
195  value_type const & operator*() const
196  {
197  return window_.front();
198  }
199 
201  {
202  return window_.front();
203  }
204 
205  // -------------------------------------------------------------------------
206  // Iteration
207  // -------------------------------------------------------------------------
208 
210  {
211  increment_();
212  return *this;
213  }
214 
215  // self_type operator ++(int)
216  // {
217  // auto cpy = *this;
218  // increment_();
219  // return cpy;
220  // }
221 
225  operator bool() const
226  {
227  return ! windows_.empty();
228  }
229 
233  bool operator==( self_type const& other ) const
234  {
235  todo
236  return cur_locus_ == it.cur_locus_;
237  }
238 
239  bool operator!=( self_type const& other ) const
240  {
241  return !(*this == other);
242  }
243 
244  // -------------------------------------------------------------------------
245  // Internal Members
246  // -------------------------------------------------------------------------
247 
248  private:
249 
250  void init_()
251  {
252  // Saveguard. This might be called on an empty range, in which case we just do nothing.
253  if( cur_locus_ == end_locus_ ) {
254  return;
255  }
256 
257  // Clear the window and prepare for new chromosome.
258  window_.clear();
259  window_.chromosome( settings_.chromosome_function( *cur_locus_ ));
260  is_first_window_ = true;
261  is_last_window_ = false;
262  next_index_ = 0;
263 
264  if( settings_.emit_leading_empty_windows ) {
265  cur_locus_start_ = 1;
266  } else {
267  // Set the start to the window position that we would get after going through all
268  // the previous windows if they were emitted.
269  auto const pos = settings_.position_function( *cur_locus_ );
270  cur_locus_start_ = pos - (( pos - 1 ) % settings_.stride );
271  }
272  }
273 
274  void increment_()
275  {
276  assert( parent_ );
277 
278  auto const cur_chr = windows_.front().chromosome();
279 
280  // We go to the next window, meaning we are done with this one.
281  windows_.pop_front();
282 
283  // If there are no windows left, that means we have processed all regions of
284  // the current chromosome. Hence, move the data input forward to the next chromosome.
285  if( windows_.empty() ) {
286  do {
287 
288  // Move the input data to the next chromosome.
289  nope not if we are already at the next chr...
290  auto const cur_chr = parent_->chromosome_function( *cur_locus_ );
291  while (
292  cur_locus_ != end_locus_ &&
293  cur_chr == parent_->chromosome_function( *cur_locus_ )
294  ) {
295  ++cur_locus_;
296  }
297 
298  // We might have reached the end. If so, we are done with the whole thing.
299  if( cur_locus_ == end_locus_ ) {
300  parent_ = nullptr;
301  return;
302  }
303 
304  // If we are here, our input has still data. Let's see if the chromosome that
305  // we just moved to also is present in our regions list. If so, we are done
306  // with this part - we are at a new chromosome. Let's store the iterators
307  // to its regions then.
308  // However, if the chromosome is not present in the region, we continue with
309  // another iteration of the while loop, to find the next chromosome of the
310  // input data.
311  auto const new_chr = parent_->chromosome_function( *cur_locus_ );
312  if( parent_->region_list_->has_chromosome( new_chr )) {
313 
314  // Get the interval tree iterators for the new chromosome.
315  // If there are regions in there, we use them!
316  auto const& tree = parent_->region_list_->get_chromosome_regions( new_chr );
317  if( tree.begin() != tree.end() ) {
318  cur_region_ = tree.begin();
319  end_region_ = tree.end();
320  cur_index_ = 0;
321  break;
322  }
323  }
324  } while ( true );
325  }
326 
327  while( cur_locus_ != end_locus_ ) {
328  // We need those often, so let's store them here.
329  auto const cur_chr = parent_->chromosome_function( *cur_locus_ );
330  auto const cur_pos = parent_->position_function( *cur_locus_ )
331 
332  // Add all regions as windows that are in the region list and span the current locus.
333  while( cur_region_ != end_region_ && cur_region_->interval().within( cur_pos )) {
334 
335  // Add the region to our current window list.
336  windows_.emplace_back();
337  auto& window = windows_.back();
338  window.chromosome( cur_chr );
339  window.first_position( cur_region_.->interval().low() );
340  window.last_position( cur_region_->interval().high() );
341 
342  // Move to the next region, and assert that (if we have not reached the end of
343  // the region list already) its low is actually not before the previous region,
344  // i.e., that the regions are sorted by their low ends (instead of high ends).
345  // This should be a property of the IntervalTree already, but let's make sure.
346  ++cur_region_;
347  assert(
348  cur_region_ == end_region_ ||
349  cur_region_->interval().low() >= window.first_position()
350  );
351  }
352 
353  // Add the current locus to all windows that span it.
354  for( auto& window : windows_ ) {
355 
356  // We only keep windows in the list that are of the current chromosome,
357  // and are already started at the current position.
358  assert( cur_chr == window.chromosome() );
359  assert( cur_pos >= window.first_position() );
360 
361  // If the window spans the current locus, add it.
362  if( cur_pos <= window.last_position() ) {
363  window.entries().emplace_back(
364  cur_index_,
365  cur_pos,
366  parent_->entry_input_function( *cur_locus_ )
367  );
368  }
369  }
370 
371  // Move to next input.
372  ++cur_index_;
373  ++cur_locus_;
374 
375 
376  if( parent_->chromosome_function( *cur_locus_ ) != window_.chromosome() ) {
377 
378  }
379  }
380  }
381 
382  private:
383 
384  // Parent
385  RegionWindowIterator<> const* parent_ = nullptr;
386 
387  // Underlying data iterator
388  ForwardIterator cur_locus_;
389  ForwardIterator end_locus_;
390  size_t cur_index_ = 0;
391 
392  // Region list
393  GenomeRegionList::iterator cur_region_;
394  GenomeRegionList::iterator end_region_;
395 
396  // Store the windows that we are filling
397  std::deque<Window> windows_;
398 
399  };
400 
401  // ======================================================================================
402  // Main Class
403  // ======================================================================================
404 
405  // -------------------------------------------------------------------------
406  // Constructors and Rule of Five
407  // -------------------------------------------------------------------------
408 
410  ForwardIterator begin, ForwardIterator end
411  )
412  : cur_locus_(begin)
413  , end_locus_(end)
414  {
415 
416  }
417 
418  ~RegionWindowIterator() = default;
419 
420  RegionWindowIterator( RegionWindowIterator const& ) = default;
422 
425 
426  // -------------------------------------------------------------------------
427  // Functors
428  // -------------------------------------------------------------------------
429 
434  std::function<DataType( InputType const& )> entry_input_function;
435 
439  std::function<std::string( InputType const& )> chromosome_function;
440 
445  std::function<size_t( InputType const& )> position_function;
446 
447  // -------------------------------------------------------------------------
448  // Data Members
449  // -------------------------------------------------------------------------
450 
451 private:
452 
453  // Settings
454  bool skip_empty_regions = true;
455 
456  GenomeRegionList const* region_list_;
457 
458 };
459 
460 } // namespace population
461 } // namespace genesis
462 
463 #endif // include guard
genesis::population::RegionWindowIterator::Iterator::operator==
bool operator==(self_type const &other) const
Compare two iterators for equality.
Definition: region_window_iterator.hpp:233
genesis::population::RegionWindowIterator::Iterator::operator->
const value_type * operator->() const
Definition: region_window_iterator.hpp:185
genesis::population::RegionWindowIterator
Iterator for Windows representing regions of a genome.
Definition: region_window_iterator.hpp:84
genesis::population::RegionWindowIterator::Iterator::operator*
const value_type & operator*() const
Definition: region_window_iterator.hpp:195
genesis::population::RegionWindowIterator::Iterator::iterator_category
std::input_iterator_tag iterator_category
Definition: region_window_iterator.hpp:128
genesis::population::RegionWindowIterator::~RegionWindowIterator
~RegionWindowIterator()=default
genesis::population::RegionWindowIterator::Iterator::~Iterator
~Iterator()=default
genesis::population::RegionWindowIterator::Iterator::operator*
value_type & operator*()
Definition: region_window_iterator.hpp:200
genome_locus.hpp
genesis::population::Window< DataType >
genesis::population::GenomeRegionList::has_chromosome
bool has_chromosome(std::string const &chromosome) const
Return whether a chromosome is stored.
Definition: genome_region_list.hpp:340
genesis::population::GenomeRegionList
List of regions in a genome, for each chromosome.
Definition: genome_region_list.hpp:82
genesis::population::RegionWindowIterator::operator=
RegionWindowIterator & operator=(RegionWindowIterator const &)=default
genesis::population::RegionWindowIterator::RegionWindowIterator
RegionWindowIterator(ForwardIterator begin, ForwardIterator end)
Definition: region_window_iterator.hpp:409
genesis::population::RegionWindowIterator::const_reference
value_type const & const_reference
Definition: region_window_iterator.hpp:102
genome_region.hpp
genesis::population::RegionWindowIterator::entry_input_function
std::function< DataType(InputType const &)> entry_input_function
Functor to convert from the underlying input iterator that provides the data for the region window to...
Definition: region_window_iterator.hpp:434
genesis::population::RegionWindowIterator::Iterator::operator->
value_type * operator->()
Definition: region_window_iterator.hpp:190
genesis::population::RegionWindowIterator::Iterator::operator++
self_type & operator++()
Definition: region_window_iterator.hpp:209
genesis::population::RegionWindowIterator::iterator_category
std::input_iterator_tag iterator_category
Definition: region_window_iterator.hpp:106
genesis::population::RegionWindowIterator::Iterator::reference
value_type const & reference
Definition: region_window_iterator.hpp:126
genesis::population::RegionWindowIterator::Window
::genesis::population::Window< DataType > Window
Definition: region_window_iterator.hpp:95
range.hpp
genesis::population::RegionWindowIterator::Iterator::Data
D Data
Definition: region_window_iterator.hpp:130
genesis::population::RegionWindowIterator::Entry
typename Window::Entry Entry
Definition: region_window_iterator.hpp:96
genome_region_list.hpp
genesis::population::RegionWindowIterator::Iterator::RegionWindowIterator
friend RegionWindowIterator
Definition: region_window_iterator.hpp:179
genesis::population::RegionWindowIterator::Iterator::operator=
Iterator & operator=(self_type const &)=default
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:124
window.hpp
genesis::population::RegionWindowIterator::Iterator::value_type
T value_type
Definition: region_window_iterator.hpp:124
genesis::population::RegionWindowIterator::Iterator
Internal iterator over the data.
Definition: region_window_iterator.hpp:115
genesis::population::RegionWindowIterator::position_function
std::function< size_t(InputType const &)> position_function
Functor that yields the current position on the chromosome, given the input iterator data.
Definition: region_window_iterator.hpp:445
genesis::population::RegionWindowIterator::Iterator::operator!=
bool operator!=(self_type const &other) const
Definition: region_window_iterator.hpp:239
genesis::population::RegionWindowIterator::Iterator::pointer
value_type const * pointer
Definition: region_window_iterator.hpp:125
genesis::population::RegionWindowIterator::settings_type
RegionWindowIteratorSettings< InputType, DataType > settings_type
Definition: region_window_iterator.hpp:98
genesis::population::RegionWindowIterator::Iterator::difference_type
std::ptrdiff_t difference_type
Definition: region_window_iterator.hpp:127
genesis::population::RegionWindowIterator::chromosome_function
std::function< std::string(InputType const &)> chromosome_function
Functor that yields the current chromosome, given the input iterator data.
Definition: region_window_iterator.hpp:439
genesis::population::GenomeRegionList::iterator
typename tree_type::iterator iterator
Definition: genome_region_list.hpp:98