A library for working with phylogenetic and population genetic data.
v0.32.0
iterator_insertions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_INSERTIONS_H_
2 #define GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_INSERTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
37 
38 #include <cassert>
39 #include <iterator>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // Iterator Insertions
46 // =================================================================================================
47 
49 {
50 
51 public:
52 
53  // -----------------------------------------------------
54  // Typedefs
55  // -----------------------------------------------------
56 
57  using iterator_category = std::forward_iterator_tag;
60 
61  // -----------------------------------------------------
62  // Constructors and Rule of Five
63  // -----------------------------------------------------
64 
66  : origin_( nullptr )
67  , vec_()
68  , pos_( 0 )
69  , cnt_( 0 )
70  , hash_( 0 )
71  {}
72 
74  : origin_( &vector )
75  , vec_( vector )
76  , pos_( 0 )
77  , cnt_( 0 )
78  {
79  // Insert a 0 (=A) value at the first position, and do a first hash calculation.
80  // Later iterations will just update all of this.
82  hash_ = vec_.hash();
83  }
84 
85  ~IteratorInsertions() = default;
86 
87  IteratorInsertions( IteratorInsertions const& ) = default;
89 
92 
93  // -----------------------------------------------------
94  // Operators
95  // -----------------------------------------------------
96 
98  {
99  return vec_;
100  }
101 
103  {
104  return &vec_;
105  }
106 
108  {
109  // Example:
110  // Original: CAT
111  // ACAT, CCAT, GCAT, TCAT,
112  // CAAT, CCAT, CGAT, CTAT,
113  // CAAT, CACT, CAGT, CATT,
114  // CATA, CATC, CATG, CATT
115  //
116  // There are duplicates in there. Currently, they are not skipped - this is left as a
117  // future optimization.
118 
119  // Shorthand.
120  size_t shift = ( 2 * ( pos_ % TwobitVector::kValuesPerWord ));
121 
122  // If there are still possible insertion values at the current position,
123  // use the next value.
124  if( cnt_ < 3 ) {
125 
126  // If we are not at the last value (11), we can simply move to the next one by
127  // adding one to the current position (00 -> 01, 01 -> 10, 10 -> 11).
128 
129  // Move a 1 to the position in the word and add it.
130  auto const one_shift = static_cast< TwobitVector::WordType >( 0x1 ) << shift;
131  vec_.data_at( pos_ / TwobitVector::kValuesPerWord ) += one_shift;
132 
133  // Update the hash: Remove the current count value, store the next one.
134  auto hash_xor = static_cast< TwobitVector::WordType >( cnt_ ^ ( cnt_ + 1 ) );
135  hash_ ^= ( hash_xor << shift );
136 
137  ++cnt_;
138 
139  // If we used all four possible insertion values at the current position,
140  // but if this is not the last possible position, move to the next one.
141  } else if( pos_ < vec_.size() - 1 ) {
142 
143  // Move the value at the next position one to the left.
144  // We can then fill is previous position with the new insertion value.
145  auto next = vec_.get( pos_ + 1 );
146  vec_.set( pos_, next );
147 
148  // Update the hash at the old position: Remove the last value of the insertion
149  // (which is a 11 = 3), and store the value that we just moved to that position.
150  auto hash_xor = static_cast< TwobitVector::WordType >( next ) ^ 0x3;
151  hash_ ^= ( hash_xor << shift );
152 
153  // Move to the next position and recalculate the shift value accordingly.
154  ++pos_;
155  shift = ( 2 * (( pos_ ) % TwobitVector::kValuesPerWord ));
156 
157  // Update the hash at the new position: Remove the value that was there before.
158  // We do not need to store a new value here, as it will be a 0 (=A) anyway.
159  hash_xor = static_cast< TwobitVector::WordType >( next );
160  hash_ ^= ( hash_xor << shift );
161 
162  // Set the value at the new position to 0 (=A) and restart the counter.
163  vec_.set( pos_, TwobitVector::ValueType::A );
164  cnt_ = 0;
165 
166  // If we are done, set everything to zero, so that the iterator
167  // is equal to the default-constructed end-iterator.
168  } else {
169  origin_ = nullptr;
170  vec_.clear();
171  pos_ = 0;
172  cnt_ = 0;
173  hash_ = 0;
174  }
175 
176  return *this;
177  }
178 
180  {
181  self_type tmp = *this;
182  ++(*this);
183  return tmp;
184  }
185 
186  bool operator == ( self_type const& other ) const
187  {
188  return ( origin_ == other.origin_ ) && ( pos_ == other.pos_ ) && ( cnt_ == other.cnt_ );
189  }
190 
191  bool operator != ( self_type const& other ) const
192  {
193  return !( other == *this );
194  }
195 
196  // -----------------------------------------------------
197  // Members
198  // -----------------------------------------------------
199 
203  size_t position() const
204  {
205  return pos_;
206  }
207 
212  {
213  return hash_;
214  }
215 
219  TwobitVector const& vector() const
220  {
221  return vec_;
222  }
223 
224 private:
225 
226  // Store the location of the original vector. This is mainly used for quickly checking
227  // whether two iterators refer to the same underlying vector.
228  // (We do not want to do a full vector equality check at each iteration.)
229  TwobitVector const* origin_;
230 
231  // The current vector, which always has an additional value (compared to the original vector).
232  TwobitVector vec_;
233 
234  // The position where currently a value is inserted.
235  size_t pos_;
236 
237  // A counter for the possible insertion values (0-3).
238  size_t cnt_;
239 
240  // The hash value of the current vector.
242 
243 };
244 
245 // =================================================================================================
246 // Range Wrapper
247 // =================================================================================================
248 
250 {
251  return {
252  IteratorInsertions( vector ),
254  };
255 }
256 
257 } // namespace utils
258 } // namespace genesis
259 
260 #endif // include guard
genesis::utils::IteratorInsertions::operator!=
bool operator!=(self_type const &other) const
Definition: iterator_insertions.hpp:191
twobit_vector.hpp
genesis::utils::IteratorInsertions::position
size_t position() const
Get the position that is currently deleted.
Definition: iterator_insertions.hpp:203
genesis::utils::IteratorInsertions
Definition: iterator_insertions.hpp:48
genesis::utils::IteratorInsertions::IteratorInsertions
IteratorInsertions()
Definition: iterator_insertions.hpp:65
genesis::utils::TwobitVector::data_at
WordType const & data_at(size_t index) const
Return a single word of the vector.
Definition: twobit_vector.cpp:176
genesis::utils::TwobitVector
Definition: twobit_vector.hpp:41
genesis::utils::IteratorInsertions::~IteratorInsertions
~IteratorInsertions()=default
genesis::utils::IteratorInsertions::iterator_category
std::forward_iterator_tag iterator_category
Definition: iterator_insertions.hpp:57
genesis::utils::iterate_insertions
utils::Range< IteratorInsertions > iterate_insertions(TwobitVector const &vector)
Definition: iterator_insertions.hpp:249
genesis::utils::TwobitVector::WordType
uint64_t WordType
Underlying word type for the bitvector.
Definition: twobit_vector.hpp:55
genesis::utils::TwobitVector::insert_at
void insert_at(size_t index, ValueType value)
Insert a value at a position.
Definition: twobit_vector.cpp:290
genesis::utils::TwobitVector::set
void set(size_t index, ValueType value)
Set a value at a position in the vector.
Definition: twobit_vector.cpp:269
genesis::utils::IteratorInsertions::IteratorInsertions
IteratorInsertions(TwobitVector const &vector)
Definition: iterator_insertions.hpp:73
genesis::utils::IteratorInsertions::vector
TwobitVector const & vector() const
Get the current vector.
Definition: iterator_insertions.hpp:219
genesis::utils::IteratorInsertions::operator*
value_type const & operator*()
Definition: iterator_insertions.hpp:97
genesis::utils::TwobitVector::ValueType::A
@ A
range.hpp
genesis::utils::TwobitVector::clear
void clear()
Clear the vector, so that it contains no data.
Definition: twobit_vector.cpp:405
genesis::utils::IteratorInsertions::hash
TwobitVector::WordType hash() const
Get the hash value of the current vector.
Definition: iterator_insertions.hpp:211
genesis::utils::IteratorInsertions::operator=
IteratorInsertions & operator=(IteratorInsertions const &)=default
genesis::utils::IteratorInsertions::operator==
bool operator==(self_type const &other) const
Definition: iterator_insertions.hpp:186
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::TwobitVector::kValuesPerWord
static const size_t kValuesPerWord
Constant that holds the number of values (of tyoe ValueType) that are stored in a single word in the ...
Definition: twobit_vector.hpp:79
functions.hpp
genesis::utils::IteratorInsertions::operator++
self_type & operator++()
Definition: iterator_insertions.hpp:107
genesis::utils::IteratorInsertions::operator->
value_type const * operator->()
Definition: iterator_insertions.hpp:102
genesis::utils::TwobitVector::hash
WordType hash() const
Calculate a hash value of the vector, based on its size() and the xor of all its words.
Definition: twobit_vector.cpp:198
genesis::utils::TwobitVector::size
size_t size() const
Return the size of the vector, that is, how many values (of type ValueType) it currently holds.
Definition: twobit_vector.cpp:131
genesis::utils::TwobitVector::get
ValueType get(size_t index) const
Get the value at a position in the vector.
Definition: twobit_vector.cpp:149