A library for working with phylogenetic and population genetic data.
v0.27.0
iterator_substitutions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_SUBSTITUTIONS_H_
2 #define GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_SUBSTITUTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
37 
38 #include <cassert>
39 #include <iterator>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // Iterator Substitutions
46 // =================================================================================================
47 
49 {
50 
51 public:
52 
53  // -----------------------------------------------------
54  // Typedefs
55  // -----------------------------------------------------
56 
57  using iterator_category = std::forward_iterator_tag;
60 
61  // -----------------------------------------------------
62  // Constructors and Rule of Five
63  // -----------------------------------------------------
64 
66  : origin_( nullptr )
67  , vec_()
68  , pos_( 0 )
69  , cnt_( 0 )
70  , hash_( 0 )
71  {}
72 
74  : origin_( &vector )
75  , vec_( vector )
76  , pos_( 0 )
77  , cnt_( 0 )
78  {
79  // Iterate to the first substitution, and do a first hash calculation.
80  // Later iterations will just update all of this.
81  operator++();
82  hash_ = vec_.hash();
83  }
84 
85  ~IteratorSubstitutions() = default;
86 
87  IteratorSubstitutions( IteratorSubstitutions const& ) = default;
89 
92 
93  // -----------------------------------------------------
94  // Operators
95  // -----------------------------------------------------
96 
98  {
99  return vec_;
100  }
101 
103  {
104  return &vec_;
105  }
106 
108  {
109  // We use four xor's at the current position to cycle through the variants:
110  // The first thee are the substitutions, the last one then restores the original value.
111  // For this, we use the xor order 01 11 01 11.
112  //
113  // The table shows that this works for all four possible values.
114  //
115  // | 00 01 10 11
116  // ---------------------
117  // 0 | 01 | 01 00 11 10
118  // 1 | 11 | 10 11 00 01
119  // 2 | 01 | 11 10 01 00
120  // 3 | 11 | 00 01 10 11
121 
122  // Helper function that cycles the value at a position.
123  auto cycle = [&] ( size_t pos, size_t& cnt ) {
124 
125  // Shorthand.
126  size_t shift = ( 2 * ( pos % TwobitVector::kValuesPerWord ));
127 
128  // Move the needed xor value to the position in the word and apply it.
129  TwobitVector::WordType xor_val = ( cnt % 2 == 0 ? 0x1 : 0x3 );
130  vec_.data_at( pos / TwobitVector::kValuesPerWord ) ^= ( xor_val << shift );
131 
132  // Update the hash: Remove the current value, store the new one.
133  // (We can simply reuse the xor value, as a^b=c <=> b^c=a)
134  hash_ ^= ( xor_val << shift );
135 
136  ++cnt;
137  };
138 
139  // Do at least one cycle at the current position.
140  cycle( pos_, cnt_ );
141 
142  // If we used all three possible substitution values at the current position:
143  if( cnt_ == 4 ) {
144 
145  // If this is not the last possible position, move to the next one.
146  if( pos_ < vec_.size() - 1 ) {
147  // Restore original value.
148  // cycle( pos_, cnt_ );
149 
150  // Move to next position and do a cycle.
151  ++pos_;
152  cnt_ = 0;
153  cycle( pos_, cnt_ );
154 
155  // If we are done, set everything to zero, so that the iterator
156  // is equal to the default-constructed end-iterator.
157  } else {
158  origin_ = nullptr;
159  vec_.clear();
160  pos_ = 0;
161  cnt_ = 0;
162  hash_ = 0;
163  }
164  }
165 
166  return *this;
167  }
168 
170  {
171  self_type tmp = *this;
172  ++(*this);
173  return tmp;
174  }
175 
176  bool operator == ( self_type const& other ) const
177  {
178  return ( origin_ == other.origin_ ) && ( pos_ == other.pos_ ) && ( cnt_ == other.cnt_ );
179  }
180 
181  bool operator != ( self_type const& other ) const
182  {
183  return !( other == *this );
184  }
185 
186  // -----------------------------------------------------
187  // Members
188  // -----------------------------------------------------
189 
193  size_t position() const
194  {
195  return pos_;
196  }
197 
202  {
203  return hash_;
204  }
205 
209  TwobitVector const& vector() const
210  {
211  return vec_;
212  }
213 
214 private:
215 
216  // Store the location of the original vector. This is mainly used for quickly checking
217  // whether two iterators refer to the same underlying vector.
218  // (We do not want to do a full vector equality check at each iteration.)
219  TwobitVector const* origin_;
220 
221  // The current vector, which always has an additional value (compared to the original vector).
222  TwobitVector vec_;
223 
224  // The position where currently a value is inserted.
225  size_t pos_;
226 
227  // A counter for the possible substitutions possibilities.
228  size_t cnt_;
229 
230  // The hash value of the current vector.
232 
233 };
234 
235 // =================================================================================================
236 // Range Wrapper
237 // =================================================================================================
238 
240 {
241  return {
242  IteratorSubstitutions( vector ),
244  };
245 }
246 
247 } // namespace utils
248 } // namespace genesis
249 
250 #endif // include guard
genesis::utils::IteratorSubstitutions::~IteratorSubstitutions
~IteratorSubstitutions()=default
twobit_vector.hpp
genesis::utils::IteratorSubstitutions::operator++
self_type & operator++()
Definition: iterator_substitutions.hpp:107
genesis::utils::TwobitVector::data_at
WordType const & data_at(size_t index) const
Return a single word of the vector.
Definition: twobit_vector.cpp:176
genesis::utils::TwobitVector
Definition: twobit_vector.hpp:41
genesis::utils::TwobitVector::WordType
uint64_t WordType
Underlying word type for the bitvector.
Definition: twobit_vector.hpp:55
genesis::utils::IteratorSubstitutions
Definition: iterator_substitutions.hpp:48
genesis::utils::IteratorSubstitutions::IteratorSubstitutions
IteratorSubstitutions(TwobitVector const &vector)
Definition: iterator_substitutions.hpp:73
genesis::utils::IteratorSubstitutions::iterator_category
std::forward_iterator_tag iterator_category
Definition: iterator_substitutions.hpp:57
genesis::utils::IteratorSubstitutions::vector
TwobitVector const & vector() const
Get the current vector.
Definition: iterator_substitutions.hpp:209
range.hpp
genesis::utils::TwobitVector::clear
void clear()
Clear the vector, so that it contains no data.
Definition: twobit_vector.cpp:405
genesis::utils::iterate_substitutions
utils::Range< IteratorSubstitutions > iterate_substitutions(TwobitVector const &vector)
Definition: iterator_substitutions.hpp:239
genesis::utils::IteratorSubstitutions::position
size_t position() const
Get the position that is currently deleted.
Definition: iterator_substitutions.hpp:193
genesis::utils::Range
Simple wrapper for typical begin() and end() iterators, to be used in range-based for loops.
Definition: range.hpp:46
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::TwobitVector::kValuesPerWord
static const size_t kValuesPerWord
Constant that holds the number of values (of tyoe ValueType) that are stored in a single word in the ...
Definition: twobit_vector.hpp:79
functions.hpp
genesis::utils::TwobitVector::hash
WordType hash() const
Calculate a hash value of the vector, based on its size() and the xor of all its words.
Definition: twobit_vector.cpp:198
genesis::utils::TwobitVector::size
size_t size() const
Return the size of the vector, that is, how many values (of type ValueType) it currently holds.
Definition: twobit_vector.cpp:131
genesis::utils::IteratorSubstitutions::operator->
value_type const * operator->()
Definition: iterator_substitutions.hpp:102
genesis::utils::IteratorSubstitutions::hash
TwobitVector::WordType hash() const
Get the hash value of the current vector.
Definition: iterator_substitutions.hpp:201
genesis::utils::IteratorSubstitutions::operator=
IteratorSubstitutions & operator=(IteratorSubstitutions const &)=default
genesis::utils::IteratorSubstitutions::operator==
bool operator==(self_type const &other) const
Definition: iterator_substitutions.hpp:176
genesis::utils::IteratorSubstitutions::IteratorSubstitutions
IteratorSubstitutions()
Definition: iterator_substitutions.hpp:65
genesis::utils::IteratorSubstitutions::operator!=
bool operator!=(self_type const &other) const
Definition: iterator_substitutions.hpp:181
genesis::utils::IteratorSubstitutions::operator*
value_type const & operator*()
Definition: iterator_substitutions.hpp:97