A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bitvector.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <assert.h>
35 #include <functional>
36 
37 namespace genesis {
38 namespace utils {
39 
40 // =============================================================================
41 // Declarations and Class Functions
42 // =============================================================================
43 
45 const Bitvector::IntType Bitvector::all_1_ = (((1ul << 32) - 1) << 32) + ((1ul << 32) - 1);
46 
51 {
52  1ul << 0, 1ul << 1, 1ul << 2, 1ul << 3, 1ul << 4, 1ul << 5, 1ul << 6, 1ul << 7,
53  1ul << 8, 1ul << 9, 1ul << 10, 1ul << 11, 1ul << 12, 1ul << 13, 1ul << 14, 1ul << 15,
54  1ul << 16, 1ul << 17, 1ul << 18, 1ul << 19, 1ul << 20, 1ul << 21, 1ul << 22, 1ul << 23,
55  1ul << 24, 1ul << 25, 1ul << 26, 1ul << 27, 1ul << 28, 1ul << 29, 1ul << 30, 1ul << 31,
56  1ul << 32, 1ul << 33, 1ul << 34, 1ul << 35, 1ul << 36, 1ul << 37, 1ul << 38, 1ul << 39,
57  1ul << 40, 1ul << 41, 1ul << 42, 1ul << 43, 1ul << 44, 1ul << 45, 1ul << 46, 1ul << 47,
58  1ul << 48, 1ul << 49, 1ul << 50, 1ul << 51, 1ul << 52, 1ul << 53, 1ul << 54, 1ul << 55,
59  1ul << 56, 1ul << 57, 1ul << 58, 1ul << 59, 1ul << 60, 1ul << 61, 1ul << 62, 1ul << 63
60 };
61 
75 {
76  Bitvector::all_0_, Bitvector::all_1_ >> 63, Bitvector::all_1_ >> 62, Bitvector::all_1_ >> 61,
77  Bitvector::all_1_ >> 60, Bitvector::all_1_ >> 59, Bitvector::all_1_ >> 58, Bitvector::all_1_ >> 57,
78  Bitvector::all_1_ >> 56, Bitvector::all_1_ >> 55, Bitvector::all_1_ >> 54, Bitvector::all_1_ >> 53,
79  Bitvector::all_1_ >> 52, Bitvector::all_1_ >> 51, Bitvector::all_1_ >> 50, Bitvector::all_1_ >> 49,
80  Bitvector::all_1_ >> 48, Bitvector::all_1_ >> 47, Bitvector::all_1_ >> 46, Bitvector::all_1_ >> 45,
81  Bitvector::all_1_ >> 44, Bitvector::all_1_ >> 43, Bitvector::all_1_ >> 42, Bitvector::all_1_ >> 41,
82  Bitvector::all_1_ >> 40, Bitvector::all_1_ >> 39, Bitvector::all_1_ >> 38, Bitvector::all_1_ >> 37,
83  Bitvector::all_1_ >> 36, Bitvector::all_1_ >> 35, Bitvector::all_1_ >> 34, Bitvector::all_1_ >> 33,
84  Bitvector::all_1_ >> 32, Bitvector::all_1_ >> 31, Bitvector::all_1_ >> 30, Bitvector::all_1_ >> 29,
85  Bitvector::all_1_ >> 28, Bitvector::all_1_ >> 27, Bitvector::all_1_ >> 26, Bitvector::all_1_ >> 25,
86  Bitvector::all_1_ >> 24, Bitvector::all_1_ >> 23, Bitvector::all_1_ >> 22, Bitvector::all_1_ >> 21,
87  Bitvector::all_1_ >> 20, Bitvector::all_1_ >> 19, Bitvector::all_1_ >> 18, Bitvector::all_1_ >> 17,
88  Bitvector::all_1_ >> 16, Bitvector::all_1_ >> 15, Bitvector::all_1_ >> 14, Bitvector::all_1_ >> 13,
89  Bitvector::all_1_ >> 12, Bitvector::all_1_ >> 11, Bitvector::all_1_ >> 10, Bitvector::all_1_ >> 9,
90  Bitvector::all_1_ >> 8, Bitvector::all_1_ >> 7, Bitvector::all_1_ >> 6, Bitvector::all_1_ >> 5,
91  Bitvector::all_1_ >> 4, Bitvector::all_1_ >> 3, Bitvector::all_1_ >> 2, Bitvector::all_1_ >> 1
92 };
93 
98 {
99  0x5555555555555555, //binary: 0101...
100  0x3333333333333333, //binary: 00110011...
101  0x0f0f0f0f0f0f0f0f, //binary: 4 zeros, 4 ones...
102  0x0101010101010101 //the sum of 256 to the power of 0,1,2,3...
103 };
104 
105 Bitvector operator & (Bitvector const& lhs, Bitvector const& rhs)
106 {
107  // make a copy.
108  Bitvector result = Bitvector(lhs);
109 
110  // check for self-and.
111  if (&lhs == &rhs) {
112  return result;
113  }
114 
115  // if not, return and with right hand side.
116  result &= rhs;
117  return result;
118 }
119 
120 Bitvector operator | (Bitvector const& lhs, Bitvector const& rhs)
121 {
122  // make a copy.
123  Bitvector result = Bitvector(lhs);
124 
125  // check for self-or.
126  if (&lhs == &rhs) {
127  return result;
128  }
129 
130  // if not, return or with right hand side.
131  result |= rhs;
132  return result;
133 }
134 
135 Bitvector operator ^ (Bitvector const& lhs, Bitvector const& rhs)
136 {
137  // check for self-xor. if so, return zero vector of same size.
138  if (&lhs == &rhs) {
139  return Bitvector(lhs.size(), false);
140  }
141 
142  // otherwise, make a copy and xor it.
143  Bitvector result = Bitvector(lhs);
144  result ^= rhs;
145  return result;
146 }
147 
151 Bitvector operator - (Bitvector const& lhs, Bitvector const& rhs)
152 {
153  return lhs & (~rhs);
154 }
155 
156 // =============================================================================
157 // Operators
158 // =============================================================================
159 
161 {
162  size_t min_s = std::min(data_.size(), rhs.data_.size());
163  for (size_t i = 0; i < min_s; ++i) {
164  data_[i] &= rhs.data_[i];
165  }
166  return *this;
167 }
168 
170 {
171  size_t min_s = std::min(data_.size(), rhs.data_.size());
172  for (size_t i = 0; i < min_s; ++i) {
173  data_[i] |= rhs.data_[i];
174  }
175  unset_padding();
176  return *this;
177 }
178 
180 {
181  size_t min_s = std::min(data_.size(), rhs.data_.size());
182  for (size_t i = 0; i < min_s; ++i) {
183  data_[i] ^= rhs.data_[i];
184  }
185  unset_padding();
186  return *this;
187 }
188 
190 {
191  Bitvector cpy = Bitvector(*this);
192  cpy.invert();
193  return cpy;
194 }
195 
196 bool Bitvector::operator == (const Bitvector &other) const
197 {
198  if (size_ != other.size_) {
199  return false;
200  }
201  for (size_t i = 0; i < data_.size(); ++i) {
202  if (data_[i] != other.data_[i]) {
203  return false;
204  }
205  }
206  return true;
207 }
208 
209 // =============================================================================
210 // Other Functions
211 // =============================================================================
212 
214 {
215  return symmetric_difference(*this, rhs);
216 }
217 
219 {
220  return (lhs | rhs) & ~(lhs & rhs);
221 }
222 
226 size_t Bitvector::count() const
227 {
228  size_t res = 0;
229  for (IntType x : data_) {
230  // put count of each 2 bits into those 2 bits
231  x -= (x >> 1) & count_mask_[0];
232 
233  // put count of each 4 bits into those 4 bits
234  x = (x & count_mask_[1]) + ((x >> 2) & count_mask_[1]);
235 
236  // put count of each 8 bits into those 8 bits
237  x = (x + (x >> 4)) & count_mask_[2];
238 
239  // take left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
240  res += (x * count_mask_[3]) >> 56;
241  }
242 
243  // safe, but slow version...
244  //~ size_t tmp = 0;
245  //~ for (size_t i = 0; i < size_; ++i) {
246  //~ if (get(i)) {
247  //~ ++tmp;
248  //~ }
249  //~ }
250  //~ assert(tmp == res);
251 
252  return res;
253 }
254 
258 size_t Bitvector::hash() const
259 {
260  // TODO this might be a poor hash function. check what kind of value a hash fct needs to return and decide whether xhash is a good choice instead!
261  size_t res = 0;
262  for (size_t i = 0; i < size_; ++i) {
263  if (get(i)) {
264  res ^= std::hash<size_t>()(i);
265  }
266  }
267  return res;
268 }
269 
275 {
276  IntType res = 0;
277  for (IntType e : data_) {
278  res ^= e;
279  }
280  return res;
281 }
282 
287 {
288  // flip all bits.
289  for (size_t i = 0; i < data_.size(); ++i) {
290  data_[i] = ~ data_[i];
291  }
292 
293  // reset the surplus bits at the end of the vector.
294  unset_padding();
295 }
296 
304 {
305  if (size_ > 0 && get(0)) {
306  invert();
307  }
308 }
309 
313 void Bitvector::reset(const bool value)
314 {
315  // set according to flag.
316  const auto v = value ? all_1_ : all_0_;
317  for (size_t i = 0; i < data_.size(); ++i) {
318  data_[i] = v;
319  }
320 
321  // if we initialized with true, we need to unset the surplus bits at the end!
322  if (value) {
323  unset_padding();
324  }
325 }
326 
335 {
336  if (size_ % IntSize == 0) {
337  return;
338  }
339  data_.back() &= ones_mask_[size_ % IntSize];
340 
341  // other versions that might be helpful if i messed up with this little/big endian stuff...
342  // first one is slow but definitely works, second one is fast, but might have the same
343  // issue as the used version above (which currently works perfectly).
344  //~ for (size_t i = size_ % IntSize; i < IntSize; ++i) {
345  //~ data_.back() &= ~bit_mask_[i];
346  //~ }
347  //~ data_.back() &= bit_mask_[size_ % IntSize] - 1;
348 }
349 
350 // =============================================================================
351 // Dump and Debug
352 // =============================================================================
353 
354 std::ostream& operator << (std::ostream& s, Bitvector const& rhs)
355 {
356  for(size_t i = 0; i < rhs.size() ; ++i) {
357  s << (rhs.get(i) ? "1" : "0");
358  }
359  return s;
360 }
361 
362 std::string Bitvector::dump() const
363 {
364  std::string res = "[" + std::to_string(size_) + "]\n";
365  for (size_t i = 0; i < size_; ++i) {
366  res += (*this)[i] ? "1" : "0";
367  if ((i+1) % 64 == 0) {
368  res += "\n";
369  } else if ((i+1) % 8 == 0) {
370  res += " ";
371  }
372  }
373  return res;
374 }
375 
376 std::string Bitvector::dump_int(IntType x) const
377 {
378  std::string res = "";
379  for (size_t i = 0; i < IntSize; ++i) {
380  res += (x & bit_mask_[i] ? "1" : "0");
381  if ((i+1) % 8 == 0) {
382  res += " ";
383  }
384  }
385  return res;
386 }
387 
388 } // namespace utils
389 } // namespace genesis
static const IntType all_0_
Definition: bitvector.hpp:250
Bitvector symmetric_difference(Bitvector const &rhs) const
Definition: bitvector.cpp:213
size_t size() const
Returns the size (number of total bits) of this Bitvector.
Definition: bitvector.hpp:105
Bitvector & operator^=(Bitvector const &rhs)
Definition: bitvector.cpp:179
size_t count() const
Counts the number of set bits in the Bitvector.
Definition: bitvector.cpp:226
static const IntType all_1_
Definition: bitvector.hpp:251
IntType x_hash() const
Returns a hash value of type IntType, that is quicker to calculate than hash(), and thus can be used ...
Definition: bitvector.cpp:274
static const IntType count_mask_[4]
Mask used for quickly counting the number of set bits, see count().
Definition: bitvector.hpp:256
bool operator&(SkipWhitespace lhs, SkipWhitespace rhs)
And-operator to check whether a SkipWhitespace is set.
Definition: scanner.hpp:95
Bitvector operator|(Bitvector const &lhs, Bitvector const &rhs)
Definition: bitvector.cpp:120
size_t hash() const
Returns an std::hash value for the Bitvector.
Definition: bitvector.cpp:258
void reset(const bool value=false)
Reset all the bits to false. If provided with parameter true, sets all bits to true.
Definition: bitvector.cpp:313
static const size_t IntSize
Definition: bitvector.hpp:67
Bitvector()=default
Default constructor. Creates an empty Bitvector of size 0.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:300
Bitvector operator^(Bitvector const &lhs, Bitvector const &rhs)
Definition: bitvector.cpp:135
bool operator==(const Bitvector &other) const
Definition: bitvector.cpp:196
void invert()
Flip all bits.
Definition: bitvector.cpp:286
Bitvector & operator&=(Bitvector const &rhs)
Definition: bitvector.cpp:160
static const IntType bit_mask_[IntSize]
Contains a single bit at each of the 64 positions.
Definition: bitvector.hpp:253
static const IntType ones_mask_[IntSize]
Contains as many ones as the position in the array tells.
Definition: bitvector.hpp:254
void normalize()
Brings the Bitvector in a normalized form, where the first bit is always zero.
Definition: bitvector.cpp:303
std::vector< IntType > data_
Definition: bitvector.hpp:263
Bitvector & operator|=(Bitvector const &rhs)
Definition: bitvector.cpp:169
bool get(size_t index) const
Returns the value of a single bit, with boundary check.
Definition: bitvector.hpp:124
void unset_padding()
Internal function that sets all bits to zero that are not actively used.
Definition: bitvector.cpp:334
Bitvector operator~() const
Definition: bitvector.cpp:189
std::ostream & operator<<(std::ostream &os, NexusBlock const &block)
Definition: block.hpp:80
std::string dump_int(IntType x) const
Definition: bitvector.cpp:376
Bitvector operator-(Bitvector const &lhs, Bitvector const &rhs)
Set-minus of two Bitvectors.
Definition: bitvector.cpp:151
std::string dump() const
Definition: bitvector.cpp:362