A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
iterator_substitutions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_SUBSTITUTIONS_H_
2 #define GENESIS_UTILS_MATH_TWOBIT_VECTOR_ITERATOR_SUBSTITUTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
37 
38 #include <assert.h>
39 #include <iterator>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // Iterator Substitutions
46 // =================================================================================================
47 
49 {
50 
51 public:
52 
53  // -----------------------------------------------------
54  // Typedefs
55  // -----------------------------------------------------
56 
57  using iterator_category = std::forward_iterator_tag;
60 
61  // -----------------------------------------------------
62  // Constructors and Rule of Five
63  // -----------------------------------------------------
64 
66  : origin_( nullptr )
67  , vec_()
68  , pos_( 0 )
69  , cnt_( 0 )
70  , hash_( 0 )
71  {}
72 
74  : origin_( &vector )
75  , vec_( vector )
76  , pos_( 0 )
77  , cnt_( 0 )
78  {
79  // Iterate to the first substitution, and do a first hash calculation.
80  // Later iterations will just update all of this.
81  operator++();
82  hash_ = vec_.hash();
83  }
84 
85  ~IteratorSubstitutions() = default;
86 
87  IteratorSubstitutions( IteratorSubstitutions const& ) = default;
89 
92 
93  // -----------------------------------------------------
94  // Operators
95  // -----------------------------------------------------
96 
98  {
99  return vec_;
100  }
101 
103  {
104  return &vec_;
105  }
106 
108  {
109  // We use four xor's at the current position to cycle through the variants:
110  // The first thee are the substitutions, the last one then restores the original value.
111  // For this, we use the xor order 01 11 01 11.
112  //
113  // The table shows that this works for all four possible values.
114  //
115  // | 00 01 10 11
116  // ---------------------
117  // 0 | 01 | 01 00 11 10
118  // 1 | 11 | 10 11 00 01
119  // 2 | 01 | 11 10 01 00
120  // 3 | 11 | 00 01 10 11
121 
122  // Helper function that cycles the value at a position.
123  auto cycle = [&] ( size_t pos, size_t& cnt ) {
124 
125  // Shorthand.
126  size_t shift = ( 2 * ( pos % TwobitVector::kValuesPerWord ));
127 
128  // Move the needed xor value to the position in the word and apply it.
129  TwobitVector::WordType xor_val = ( cnt % 2 == 0 ? 0x1 : 0x3 );
130  vec_.data_at( pos / TwobitVector::kValuesPerWord ) ^= ( xor_val << shift );
131 
132  // Update the hash: Remove the current value, store the new one.
133  // (We can simply reuse the xor value, as a^b=c <=> b^c=a)
134  hash_ ^= ( xor_val << shift );
135 
136  ++cnt;
137  };
138 
139  // Do at least one cycle at the current position.
140  cycle( pos_, cnt_ );
141 
142  // If we used all three possible substitution values at the current position:
143  if( cnt_ == 4 ) {
144 
145  // If this is not the last possible position, move to the next one.
146  if( pos_ < vec_.size() - 1 ) {
147  // Restore original value.
148  // cycle( pos_, cnt_ );
149 
150  // Move to next position and do a cycle.
151  ++pos_;
152  cnt_ = 0;
153  cycle( pos_, cnt_ );
154 
155  // If we are done, set everything to zero, so that the iterator
156  // is equal to the default-constructed end-iterator.
157  } else {
158  origin_ = nullptr;
159  vec_.clear();
160  pos_ = 0;
161  cnt_ = 0;
162  hash_ = 0;
163  }
164  }
165 
166  return *this;
167  }
168 
170  {
171  self_type tmp = *this;
172  ++(*this);
173  return tmp;
174  }
175 
176  bool operator == ( self_type const& other ) const
177  {
178  return ( origin_ == other.origin_ ) && ( pos_ == other.pos_ ) && ( cnt_ == other.cnt_ );
179  }
180 
181  bool operator != ( self_type const& other ) const
182  {
183  return !( other == *this );
184  }
185 
186  // -----------------------------------------------------
187  // Members
188  // -----------------------------------------------------
189 
193  size_t position() const
194  {
195  return pos_;
196  }
197 
202  {
203  return hash_;
204  }
205 
209  TwobitVector const& vector() const
210  {
211  return vec_;
212  }
213 
214 private:
215 
216  // Store the location of the original vector. This is mainly used for quickly checking
217  // whether two iterators refer to the same underlying vector.
218  // (We do not want to do a full vector equality check at each iteration.)
219  TwobitVector const* origin_;
220 
221  // The current vector, which always has an additional value (compared to the original vector).
222  TwobitVector vec_;
223 
224  // The position where currently a value is inserted.
225  size_t pos_;
226 
227  // A counter for the possible substitutions possibilities.
228  size_t cnt_;
229 
230  // The hash value of the current vector.
232 
233 };
234 
235 // =================================================================================================
236 // Range Wrapper
237 // =================================================================================================
238 
240 {
241  return {
242  IteratorSubstitutions( vector ),
244  };
245 }
246 
247 } // namespace utils
248 } // namespace genesis
249 
250 #endif // include guard
static const size_t kValuesPerWord
Constant that holds the number of values (of tyoe ValueType) that are stored in a single word in the ...
size_t size() const
Return the size of the vector, that is, how many values (of type ValueType) it currently holds...
WordType const & data_at(size_t index) const
Return a single word of the vector.
size_t position() const
Get the position that is currently deleted.
IteratorSubstitutions(TwobitVector const &vector)
uint64_t WordType
Underlying word type for the bitvector.
TwobitVector::WordType hash() const
Get the hash value of the current vector.
WordType hash() const
Calculate a hash value of the vector, based on its size() and the xor of all its words.
bool operator==(self_type const &other) const
void clear()
Clear the vector, so that it contains no data.
IteratorSubstitutions & operator=(IteratorSubstitutions const &)=default
bool operator!=(self_type const &other) const
TwobitVector const & vector() const
Get the current vector.
utils::Range< IteratorSubstitutions > iterate_substitutions(TwobitVector const &vector)