A library for working with phylogenetic and population genetic data.
v0.27.0
window.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_WINDOW_H_
2 #define GENESIS_POPULATION_WINDOW_WINDOW_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <cassert>
35 #include <deque>
36 #include <stdexcept>
37 #include <string>
38 #include <vector>
39 
41 
42 namespace genesis {
43 namespace population {
44 
45 // =================================================================================================
46 // Auxiliary Structures
47 // =================================================================================================
48 
57 {};
58 
59 // =================================================================================================
60 // Genomic Window
61 // =================================================================================================
62 
103 template<class D, class A = EmptyAccumulator>
104 class Window
105 {
106 public:
107 
108  // -------------------------------------------------------------------------
109  // Typedefs and Enums
110  // -------------------------------------------------------------------------
111 
112  using Data = D;
113  using Accumulator = A;
114 
124  struct Entry
125  {
129  Entry( size_t index, size_t position, Data const& data )
130  : index(index)
131  , position(position)
132  , data(data)
133  {}
134 
139  Entry( size_t index, size_t position, Data&& data )
140  : index(index)
141  , position(position)
142  , data(std::move( data ))
143  {}
144 
150  operator Data&()
151  {
152  return data;
153  }
154 
160  operator Data const&() const
161  {
162  return data;
163  }
164 
172  size_t index;
173 
181  size_t position;
182 
187  };
188 
190  using container = std::deque<Entry>;
191 
192  using value_type = Entry;
194  using const_reference = value_type const&;
195 
196  using iterator = typename container::iterator;
197  using const_iterator = typename container::const_iterator;
198  using reverse_iterator = typename container::reverse_iterator;
199  using const_reverse_iterator = typename container::const_reverse_iterator;
200 
201  using difference_type = typename container::difference_type;
202  using size_type = typename container::size_type;
203 
204  // -------------------------------------------------------------------------
205  // Constructors and Rule of Five
206  // -------------------------------------------------------------------------
207 
208  Window() = default;
209  ~Window() = default;
210 
211  Window( Window const& ) = default;
212  Window( Window&& ) = default;
213 
214  Window& operator= ( Window const& ) = default;
215  Window& operator= ( Window&& ) = default;
216 
217  // -------------------------------------------------------------------------
218  // General Properties
219  // -------------------------------------------------------------------------
220 
224  std::string const& chromosome() const
225  {
226  return chromosome_;
227  }
228 
232  void chromosome( std::string const& value )
233  {
234  chromosome_ = value;
235  }
236 
250  size_t width() const
251  {
252  // We need to do the check here, when this is used.
253  if( first_position_ > last_position_ ) {
254  throw std::runtime_error(
255  "Invalidly set first and last position in the Window, with " +
256  std::to_string( first_position_ ) + " >= " + std::to_string( last_position_ )
257  );
258  }
259  return last_position_ - first_position_ + 1;
260  }
261 
267  size_t entry_count() const
268  {
269  return entries_.size();
270  }
271 
277  size_t size() const
278  {
279  return entries_.size();
280  }
281 
286  bool empty() const
287  {
288  return entries_.empty();
289  }
290 
294  double saturation() const
295  {
296  double const frac = static_cast<double>( entries_.size() ) / static_cast<double>( width() );
297 
298  assert( width() > 0 );
299  assert( frac >= 0.0 );
300  assert( frac <= 1.0 );
301  return frac;
302  }
303 
315  size_t span() const
316  {
317  if( entries_.empty() ) {
318  return 0;
319  }
320  return entries_.back().position - entries_.front().position + 1;
321  }
322 
323  // -------------------------------------------------------------------------
324  // Position
325  // -------------------------------------------------------------------------
326 
338  size_t first_position() const
339  {
340  return first_position_;
341  }
342 
348  void first_position( size_t value )
349  {
350  first_position_ = value;
351  }
352 
358  size_t last_position() const
359  {
360  return last_position_;
361  }
362 
368  void last_position( size_t value )
369  {
370  last_position_ = value;
371  }
372 
380  {
381  return { chromosome_, first_position_, last_position_ };
382  }
383 
384  // -------------------------------------------------------------------------
385  // Data Accessors
386  // -------------------------------------------------------------------------
387 
393  reference operator[]( size_t index )
394  {
395  assert( index < entries_.size() );
396  return entries_[ index ];
397  }
398 
404  const_reference operator[]( size_t index ) const
405  {
406  assert( index < entries_.size() );
407  return entries_[ index ];
408  }
409 
413  reference at( size_t index )
414  {
415  return entries_.at( index );
416  }
417 
421  const_reference at( size_t index ) const
422  {
423  return entries_.at( index );
424  }
425 
430  {
431  return entries_.begin();
432  }
433 
438  {
439  return entries_.begin();
440  }
441 
446  {
447  return entries_.end();
448  }
449 
454  {
455  return entries_.end();
456  }
457 
461  container const& entries() const
462  {
463  return entries_;
464  }
465 
470  {
471  return entries_;
472  }
473 
478  {
479  return accumulator_;
480  }
481 
482 
486  Accumulator const& accumulator() const
487  {
488  return accumulator_;
489  }
490 
491  // -------------------------------------------------------------------------
492  // Modifiers and Helpers
493  // -------------------------------------------------------------------------
494 
502  void validate() const
503  {
504  if( first_position_ == 0 ) {
505  throw std::runtime_error( "Invalid Window with first_position() == 0." );
506  }
507  if( last_position_ < first_position_ ) {
508  throw std::runtime_error( "Invalid Window with last_position() < first_position()." );
509  }
510  for( auto const& entry : entries_ ) {
511  if( entry.position < first_position_ || entry.position > last_position_ ) {
512  throw std::runtime_error(
513  "Invalid Window::Entry in chromosome " + chromosome_ + " at position " +
514  std::to_string( entry.position ) +
515  ", which is not between the window boundaries [" +
516  std::to_string( first_position_ ) + "," +
517  std::to_string( last_position_ ) + "]."
518  );
519  }
520  }
521  }
522 
526  void clear()
527  {
528  chromosome_ = "";
529  first_position_ = 0;
530  last_position_ = 0;
531  accumulator_ = Accumulator{};
532  entries_ = container{};
533  }
534 
535  // -------------------------------------------------------------------------
536  // Data Members
537  // -------------------------------------------------------------------------
538 
539 private:
540 
541  std::string chromosome_;
542  size_t first_position_ = 0;
543  size_t last_position_ = 0;
544 
545  Accumulator accumulator_;
546  container entries_;
547 
548 };
549 
550 } // namespace population
551 } // namespace genesis
552 
553 #endif // include guard
genesis::population::Window::accumulator
Accumulator & accumulator()
Get the Accumulator data that can be used for speeding up certain window computations.
Definition: window.hpp:477
genesis::population::Window::operator[]
reference operator[](size_t index)
Return a reference to the element at specified location index.
Definition: window.hpp:393
genesis::population::Window::Window
Window()=default
genesis::population::Window::at
const_reference at(size_t index) const
Return a reference to the element at specified location pos, with bounds checking.
Definition: window.hpp:421
genesis::population::Window
Window over the chromosomes of a genome.
Definition: window.hpp:104
genesis::population::Window::Entry::position
size_t position
Genomic position (1-based) of the entry along a chromosome.
Definition: window.hpp:181
genesis::population::Window::validate
void validate() const
Validate the window data.
Definition: window.hpp:502
genesis::population::Window::Entry::index
size_t index
Index of the entry, that is, how many other entries have there been in total in the underlying data f...
Definition: window.hpp:172
genesis::population::Window::last_position
void last_position(size_t value)
Set the last position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:368
genesis::population::Window::entry_count
size_t entry_count() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:267
genesis::population::Window::size
size_t size() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:277
genesis::population::Window::accumulator
Accumulator const & accumulator() const
Get the Accumulator data that can be used for speeding up certain window computations.
Definition: window.hpp:486
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: functions/genome_locus.hpp:48
genome_region.hpp
genesis::population::Window::Entry::Entry
Entry(size_t index, size_t position, Data const &data)
Contructor that takes data by reference.
Definition: window.hpp:129
genesis::population::Window::Entry::data
Data data
Data stored in the Window for this entry.
Definition: window.hpp:186
genesis::population::Window::last_position
size_t last_position() const
Get the last position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:358
genesis::population::Window::end
iterator end()
Iterator to the end of the Data Entries.
Definition: window.hpp:445
genesis::population::Window< DataType >::const_reverse_iterator
typename container::const_reverse_iterator const_reverse_iterator
Definition: window.hpp:199
genesis::population::Window< DataType >::reference
value_type & reference
Definition: window.hpp:193
genesis::population::Window< DataType >::value_type
Entry value_type
Definition: window.hpp:192
genesis::population::Window::~Window
~Window()=default
genesis::population::Window::empty
bool empty() const
Return whether the Window is empty, that is, if it does not contain any Entries.
Definition: window.hpp:286
genesis::population::Window::entries
container const & entries() const
Immediate container access to the Data Entries.
Definition: window.hpp:461
genesis::population::Window::span
size_t span() const
Get the distance that is spanned by the first and the last variant (entry) in the Window,...
Definition: window.hpp:315
genesis::population::Window::operator[]
const_reference operator[](size_t index) const
Return a reference to the element at specified location index.
Definition: window.hpp:404
genesis::population::EmptyAccumulator
Empty helper data struct to serve as a dummy for Window.
Definition: window.hpp:56
genesis::population::Window< DataType >::size_type
typename container::size_type size_type
Definition: window.hpp:202
genesis::population::Window::genome_region
GenomeRegion genome_region() const
Return the genome region that this Windows is defined over.
Definition: window.hpp:379
genesis::population::Window< DataType >::container
std::deque< Entry > container
Definition: window.hpp:190
genesis::population::Window::Accumulator
A Accumulator
Definition: window.hpp:113
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:124
genesis::population::Window::Entry::Entry
Entry(size_t index, size_t position, Data &&data)
Contructor that takes data by r-value reference (i.e., moved data); preferred if possible to use,...
Definition: window.hpp:139
genesis::population::Window::at
reference at(size_t index)
Return a reference to the element at specified location pos, with bounds checking.
Definition: window.hpp:413
genesis::population::Window< DataType >::Data
DataType Data
Definition: window.hpp:112
genesis::population::Window::chromosome
void chromosome(std::string const &value)
Set the chromosome name that this Window belongs to.
Definition: window.hpp:232
genesis::population::Window::width
size_t width() const
Get the width of the Window.
Definition: window.hpp:250
genesis::population::Window< DataType >::difference_type
typename container::difference_type difference_type
Definition: window.hpp:201
genesis::population::Window::first_position
void first_position(size_t value)
Set the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:348
genesis::population::Window::entries
container & entries()
Immediate container access to the Data Entries.
Definition: window.hpp:469
genesis::population::Window::begin
const_iterator begin() const
Const iterator to the begin of the Data Entries.
Definition: window.hpp:437
genesis::population::Window::end
const_iterator end() const
Const iterator to the end of the Data Entries.
Definition: window.hpp:453
genesis::population::Window::operator=
Window & operator=(Window const &)=default
genesis::population::GenomeRegion
A region (between two positions) on a chromosome.
Definition: genome_region.hpp:60
genesis::population::Window::first_position
size_t first_position() const
Get the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:338
genesis::population::Window< DataType >::const_iterator
typename container::const_iterator const_iterator
Definition: window.hpp:197
genesis::population::Window::clear
void clear()
Clear all data from the Window.
Definition: window.hpp:526
genesis::population::Window::begin
iterator begin()
Iterator to the begin of the Data Entries.
Definition: window.hpp:429
genesis::population::Window::chromosome
std::string const & chromosome() const
Get the chromosome name that this Window belongs to.
Definition: window.hpp:224
genesis::population::Window< DataType >::reverse_iterator
typename container::reverse_iterator reverse_iterator
Definition: window.hpp:198
genesis::population::Window::saturation
double saturation() const
Get the fraction of entries to window width.
Definition: window.hpp:294
genesis::population::Window< DataType >::iterator
typename container::iterator iterator
Definition: window.hpp:196
genesis::population::Window< DataType >::const_reference
value_type const & const_reference
Definition: window.hpp:194