A library for working with phylogenetic data.
v0.25.0
window.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_WINDOW_WINDOW_H_
2 #define GENESIS_POPULATION_WINDOW_WINDOW_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <cassert>
35 #include <deque>
36 #include <stdexcept>
37 #include <string>
38 #include <vector>
39 
40 namespace genesis {
41 namespace population {
42 
43 // =================================================================================================
44 // Auxiliary Structures
45 // =================================================================================================
46 
51 enum class WindowType
52 {
53  kInterval,
54  kVariants
55 };
56 
69 enum class WindowAnchorType
70 {
79 };
80 
89 {};
90 
91 // =================================================================================================
92 // Genomic Window
93 // =================================================================================================
94 
107 template<class D, class A = EmptyAccumulator>
108 class Window
109 {
110 public:
111 
112  // -------------------------------------------------------------------------
113  // Typedefs and Enums
114  // -------------------------------------------------------------------------
115 
116  using Data = D;
117  using Accumulator = A;
118 
127  struct Entry
128  {
132  Entry( size_t index, size_t position, Data const& data )
133  : index(index)
134  , position(position)
135  , data(data)
136  {}
137 
141  Entry( size_t index, size_t position, Data&& data )
142  : index(index)
143  , position(position)
144  , data(std::move( data ))
145  {}
146 
152  operator Data&()
153  {
154  return data;
155  }
156 
157 
163  operator Data const&() const
164  {
165  return data;
166  }
167 
171  size_t index;
172 
179  size_t position;
180 
185  };
186 
188  using container = std::deque<Entry>;
189 
190  using value_type = Entry;
192  using const_reference = value_type const&;
193 
194  using iterator = typename container::iterator;
195  using const_iterator = typename container::const_iterator;
196  using reverse_iterator = typename container::reverse_iterator;
197  using const_reverse_iterator = typename container::const_reverse_iterator;
198 
199  using difference_type = typename container::difference_type;
200  using size_type = typename container::size_type;
201 
202  // -------------------------------------------------------------------------
203  // Constructors and Rule of Five
204  // -------------------------------------------------------------------------
205 
206  Window() = default;
207  ~Window() = default;
208 
209  Window( Window const& ) = default;
210  Window( Window&& ) = default;
211 
212  Window& operator= ( Window const& ) = default;
213  Window& operator= ( Window&& ) = default;
214 
215  // -------------------------------------------------------------------------
216  // General Properties
217  // -------------------------------------------------------------------------
218 
222  std::string const& chromosome() const
223  {
224  return chromosome_;
225  }
226 
230  void chromosome( std::string const& value )
231  {
232  chromosome_ = value;
233  }
234 
246  size_t width() const
247  {
248  // We need to do the check here, when this is used.
249  if( first_position_ >= last_position_ ) {
250  throw std::runtime_error(
251  "Invalidly set first and last position in the Window, with " +
252  std::to_string( first_position_ ) + " >= " + std::to_string( last_position_ )
253  );
254  }
255  return last_position_ - first_position_;
256  }
257 
263  size_t entry_count() const
264  {
265  return entries_.size();
266  }
267 
273  size_t size() const
274  {
275  return entries_.size();
276  }
277 
282  bool empty() const
283  {
284  return entries_.empty();
285  }
286 
290  double saturation() const
291  {
292  double const frac = static_cast<double>( entries_.size() ) / static_cast<double>( width() );
293 
294  assert( width() > 0 );
295  assert( frac >= 0.0 );
296  assert( frac <= 1.0 );
297  return frac;
298  }
299 
308  size_t span() const
309  {
310  if( entries_.empty() ) {
311  return 0;
312  }
313  return entries_.back().position - entries_.front().position;
314  }
315 
316  // -------------------------------------------------------------------------
317  // Position
318  // -------------------------------------------------------------------------
319 
325  size_t first_position() const
326  {
327  return first_position_;
328  }
329 
335  void first_position( size_t value )
336  {
337  first_position_ = value;
338  }
339 
346  size_t last_position() const
347  {
348  return last_position_;
349  }
350 
357  void last_position( size_t value )
358  {
359  last_position_ = value;
360  }
361 
370  size_t anchor_position() const
371  {
372  return anchor_position( anchor_type_ );
373  }
374 
379  {
380  auto check_entries = [&](){
381  if( entries_.empty() ) {
382  throw std::runtime_error(
383  "Cannot use empty Window (with no variants/entries) for variant-based anchor "
384  "positions. Typically these anchor positions are used with WindowType::kVariants."
385  );
386  }
387  };
388 
389  // Calculate the SNP position that we want to output when emitting a window.
390  // Some use integer division, which is intended. We don't want the hassle of floating
391  // point genomic positions, so we have to do these roundings... But given a large window
392  // size, that should probably not matter much.
393  switch( anchor_type ) {
395  return first_position_;
396  }
398  return last_position_;
399  }
401  return ( first_position_ + last_position_ ) / 2;
402  }
404  check_entries();
405  assert( ! entries_.empty() );
406  return entries_.front().position;
407  }
409  check_entries();
410  assert( ! entries_.empty() );
411  return entries_.back().position;
412  }
414  check_entries();
415  assert( ! entries_.empty() );
416  return entries_[ entries_.size() / 2 ].position;
417  }
419  check_entries();
420  assert( ! entries_.empty() );
421 
422  size_t sum = 0;
423  for( auto const& e : entries_ ) {
424  sum += e.position;
425  }
426  return sum / entries_.size();
427  }
429  check_entries();
430  assert( ! entries_.empty() );
431  return (entries_.front().position + entries_.back().position) / 2;
432  }
433  default: {
434  throw std::runtime_error( "Invalid WindowAnchorType." );
435  }
436  }
437  assert( false );
438  return 0;
439  }
440 
445  {
446  return anchor_type_;
447  }
448 
456  {
457  anchor_type_ = value;
458  }
459 
460  // -------------------------------------------------------------------------
461  // Data Accessors
462  // -------------------------------------------------------------------------
463 
469  reference operator[]( size_t index )
470  {
471  assert( index < entries_.size() );
472  return entries_[ index ];
473  }
474 
480  const_reference operator[]( size_t index ) const
481  {
482  assert( index < entries_.size() );
483  return entries_[ index ];
484  }
485 
489  reference at( size_t index )
490  {
491  return entries_.at( index );
492  }
493 
497  const_reference at( size_t index ) const
498  {
499  return entries_.at( index );
500  }
501 
506  {
507  return entries_.begin();
508  }
509 
514  {
515  return entries_.begin();
516  }
517 
522  {
523  return entries_.end();
524  }
525 
530  {
531  return entries_.end();
532  }
533 
537  container const& entries() const
538  {
539  return entries_;
540  }
541 
546  {
547  return entries_;
548  }
549 
554  {
555  return accumulator_;
556  }
557 
558 
562  Accumulator const& accumulator() const
563  {
564  return accumulator_;
565  }
566 
567  // -------------------------------------------------------------------------
568  // Modifiers
569  // -------------------------------------------------------------------------
570 
574  void clear()
575  {
576  chromosome_ = "";
577  first_position_ = 0;
578  last_position_ = 0;
579  accumulator_ = Accumulator{};
580  entries_ = container{};
581  }
582 
583  // -------------------------------------------------------------------------
584  // Data Members
585  // -------------------------------------------------------------------------
586 
587 private:
588 
590 
591  std::string chromosome_;
592  size_t first_position_ = 0;
593  size_t last_position_ = 0;
594 
595  Accumulator accumulator_;
596  container entries_;
597 
598 };
599 
600 } // namespace population
601 } // namespace genesis
602 
603 #endif // include guard
genesis::population::Window::accumulator
Accumulator & accumulator()
Get the Accumulator data that can be used for speeding up certain window computations.
Definition: window.hpp:553
genesis::population::WindowAnchorType::kVariantMean
@ kVariantMean
genesis::utils::sum
double sum(const Histogram &h)
Definition: utils/math/histogram/stats.cpp:140
genesis::population::Window::operator[]
reference operator[](size_t index)
Return a reference to the element at specified location index.
Definition: window.hpp:469
genesis::population::Window::Window
Window()=default
genesis::population::Window::at
const_reference at(size_t index) const
Return a reference to the element at specified location pos, with bounds checking.
Definition: window.hpp:497
genesis::population::WindowAnchorType::kVariantLast
@ kVariantLast
genesis::population::WindowAnchorType::kVariantMedian
@ kVariantMedian
genesis::population::Window
Window over the chromosomes of a genome.
Definition: window.hpp:108
genesis::population::WindowAnchorType::kIntervalBegin
@ kIntervalBegin
genesis::population::Window::Entry::position
size_t position
Genomic position of the entry along a chromosome.
Definition: window.hpp:179
genesis::population::Window::Entry::index
size_t index
Index of the entry, that is, how many other entries have there been in total.
Definition: window.hpp:171
genesis::population::Window::last_position
void last_position(size_t value)
Set the last (past-the-end) position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:357
genesis::population::WindowAnchorType::kIntervalEnd
@ kIntervalEnd
genesis::population::WindowType::kVariants
@ kVariants
genesis::population::Window::entry_count
size_t entry_count() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:263
genesis::population::Window::size
size_t size() const
Get the number of D/Data Entries that are stored in the Window.
Definition: window.hpp:273
genesis::population::Window::accumulator
Accumulator const & accumulator() const
Get the Accumulator data that can be used for speeding up certain window computations.
Definition: window.hpp:562
genesis::population::Window::Entry::Entry
Entry(size_t index, size_t position, Data const &data)
Contructor that takes data by reference.
Definition: window.hpp:132
genesis::population::Window::Entry::data
Data data
Data stored in the Window for this entry.
Definition: window.hpp:184
genesis::population::Window::last_position
size_t last_position() const
Get the last (past-the-end) position in the chromosome of the Window, that is, where the Window ends.
Definition: window.hpp:346
genesis::population::Window::end
iterator end()
Iterator to the end of the Data Entries.
Definition: window.hpp:521
genesis::population::Window< double, EmptyAccumulator >::const_reverse_iterator
typename container::const_reverse_iterator const_reverse_iterator
Definition: window.hpp:197
genesis::population::Window< double, EmptyAccumulator >::reference
value_type & reference
Definition: window.hpp:191
genesis::population::Window< double, EmptyAccumulator >::value_type
Entry value_type
Definition: window.hpp:190
genesis::population::Window::~Window
~Window()=default
genesis::population::Window::empty
bool empty() const
Return whether the Window is empty, that is, if it does not contain any Entries.
Definition: window.hpp:282
genesis::population::WindowType::kInterval
@ kInterval
genesis::population::Window::entries
container const & entries() const
Immediate container access to the Data Entries.
Definition: window.hpp:537
genesis::population::Window::span
size_t span() const
Get the distance between the first and the last variant (entry) in the Window.
Definition: window.hpp:308
genesis::population::Window::operator[]
const_reference operator[](size_t index) const
Return a reference to the element at specified location index.
Definition: window.hpp:480
genesis::population::WindowAnchorType::kIntervalMidpoint
@ kIntervalMidpoint
genesis::population::Window::anchor_position
size_t anchor_position(WindowAnchorType anchor_type) const
Get the position in the chromosome reported according to a specific WindowAnchorType.
Definition: window.hpp:378
genesis::population::EmptyAccumulator
Empty helper data struct to serve as a dummy for Window.
Definition: window.hpp:88
genesis::population::Window< double, EmptyAccumulator >::size_type
typename container::size_type size_type
Definition: window.hpp:200
genesis::population::Window< double, EmptyAccumulator >::container
std::deque< Entry > container
Definition: window.hpp:188
genesis::population::Window::anchor_position
size_t anchor_position() const
Get the position in the chromosome reported according to the currently set WindowAnchorType.
Definition: window.hpp:370
genesis::population::Window::Accumulator
A Accumulator
Definition: window.hpp:117
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::Window::Entry
Data that is stored per entry that was enqueued in a window.
Definition: window.hpp:127
genesis::population::Window::Entry::Entry
Entry(size_t index, size_t position, Data &&data)
Contructor that takes data by r-value reference; preferred if possible to use.
Definition: window.hpp:141
genesis::population::Window::at
reference at(size_t index)
Return a reference to the element at specified location pos, with bounds checking.
Definition: window.hpp:489
genesis::population::WindowAnchorType::kVariantFirst
@ kVariantFirst
genesis::population::Window< double, EmptyAccumulator >::Data
double Data
Definition: window.hpp:116
genesis::population::Window::chromosome
void chromosome(std::string const &value)
Set the chromosome name that this Window belongs to.
Definition: window.hpp:230
genesis::population::Window::width
size_t width() const
Get the width of the Window.
Definition: window.hpp:246
genesis::population::Window< double, EmptyAccumulator >::difference_type
typename container::difference_type difference_type
Definition: window.hpp:199
genesis::population::Window::anchor_type
void anchor_type(WindowAnchorType value)
Set the WindowAnchorType that is currently set for using anchor_position().
Definition: window.hpp:455
genesis::population::Window::first_position
void first_position(size_t value)
Set the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:335
genesis::population::Window::entries
container & entries()
Immediate container access to the Data Entries.
Definition: window.hpp:545
genesis::population::Window::begin
const_iterator begin() const
Const iterator to the begin of the Data Entries.
Definition: window.hpp:513
genesis::population::Window::anchor_type
WindowAnchorType anchor_type() const
Get the WindowAnchorType that is currently set for using anchor_position().
Definition: window.hpp:444
genesis::population::Window::end
const_iterator end() const
Const iterator to the end of the Data Entries.
Definition: window.hpp:529
genesis::population::Window::operator=
Window & operator=(Window const &)=default
genesis::population::Window::first_position
size_t first_position() const
Get the first position in the chromosome of the Window, that is, where the Window starts.
Definition: window.hpp:325
genesis::population::Window< double, EmptyAccumulator >::const_iterator
typename container::const_iterator const_iterator
Definition: window.hpp:195
genesis::population::to_string
std::string to_string(GenomeRegion const &region)
Definition: functions/genome_region.cpp:55
genesis::population::Window::clear
void clear()
Clear all data from the Window.
Definition: window.hpp:574
genesis::population::Window::begin
iterator begin()
Iterator to the begin of the Data Entries.
Definition: window.hpp:505
genesis::population::WindowAnchorType::kVariantMidpoint
@ kVariantMidpoint
genesis::population::WindowType
WindowType
WindowType of a Window, that is, whether we slide along a fixed size interval of the genome,...
Definition: window.hpp:51
genesis::population::WindowAnchorType
WindowAnchorType
Position in the genome that is used for reporting when emitting or using a window.
Definition: window.hpp:69
genesis::population::Window::chromosome
std::string const & chromosome() const
Get the chromosome name that this Window belongs to.
Definition: window.hpp:222
genesis::population::Window< double, EmptyAccumulator >::reverse_iterator
typename container::reverse_iterator reverse_iterator
Definition: window.hpp:196
genesis::population::Window::saturation
double saturation() const
Get the fraction of entries to window width.
Definition: window.hpp:290
genesis::population::Window< double, EmptyAccumulator >::iterator
typename container::iterator iterator
Definition: window.hpp:194
genesis::population::Window< double, EmptyAccumulator >::const_reference
value_type const & const_reference
Definition: window.hpp:192