A toolkit for working with phylogenetic data.
v0.24.0
bitvector.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
34 
35 #include <algorithm>
36 #include <functional>
37 #include <stdexcept>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // =============================================================================
43 // Constants
44 // =============================================================================
45 
46 const Bitvector::IntType Bitvector::all_0_ = 0ul;
47 const Bitvector::IntType Bitvector::all_1_ = (((1ul << 32) - 1) << 32) + ((1ul << 32) - 1);
48 
49 const std::array<Bitvector::IntType, Bitvector::IntSize> Bitvector::bit_mask_ =
50 {{
51  1ul << 0, 1ul << 1, 1ul << 2, 1ul << 3, 1ul << 4, 1ul << 5, 1ul << 6, 1ul << 7,
52  1ul << 8, 1ul << 9, 1ul << 10, 1ul << 11, 1ul << 12, 1ul << 13, 1ul << 14, 1ul << 15,
53  1ul << 16, 1ul << 17, 1ul << 18, 1ul << 19, 1ul << 20, 1ul << 21, 1ul << 22, 1ul << 23,
54  1ul << 24, 1ul << 25, 1ul << 26, 1ul << 27, 1ul << 28, 1ul << 29, 1ul << 30, 1ul << 31,
55  1ul << 32, 1ul << 33, 1ul << 34, 1ul << 35, 1ul << 36, 1ul << 37, 1ul << 38, 1ul << 39,
56  1ul << 40, 1ul << 41, 1ul << 42, 1ul << 43, 1ul << 44, 1ul << 45, 1ul << 46, 1ul << 47,
57  1ul << 48, 1ul << 49, 1ul << 50, 1ul << 51, 1ul << 52, 1ul << 53, 1ul << 54, 1ul << 55,
58  1ul << 56, 1ul << 57, 1ul << 58, 1ul << 59, 1ul << 60, 1ul << 61, 1ul << 62, 1ul << 63
59 }};
60 
61 const std::array<Bitvector::IntType, Bitvector::IntSize> Bitvector::ones_mask_ =
62 {{
63  Bitvector::all_1_, Bitvector::all_1_ >> 63, Bitvector::all_1_ >> 62, Bitvector::all_1_ >> 61,
64  Bitvector::all_1_ >> 60, Bitvector::all_1_ >> 59, Bitvector::all_1_ >> 58, Bitvector::all_1_ >> 57,
65  Bitvector::all_1_ >> 56, Bitvector::all_1_ >> 55, Bitvector::all_1_ >> 54, Bitvector::all_1_ >> 53,
66  Bitvector::all_1_ >> 52, Bitvector::all_1_ >> 51, Bitvector::all_1_ >> 50, Bitvector::all_1_ >> 49,
67  Bitvector::all_1_ >> 48, Bitvector::all_1_ >> 47, Bitvector::all_1_ >> 46, Bitvector::all_1_ >> 45,
68  Bitvector::all_1_ >> 44, Bitvector::all_1_ >> 43, Bitvector::all_1_ >> 42, Bitvector::all_1_ >> 41,
69  Bitvector::all_1_ >> 40, Bitvector::all_1_ >> 39, Bitvector::all_1_ >> 38, Bitvector::all_1_ >> 37,
70  Bitvector::all_1_ >> 36, Bitvector::all_1_ >> 35, Bitvector::all_1_ >> 34, Bitvector::all_1_ >> 33,
71  Bitvector::all_1_ >> 32, Bitvector::all_1_ >> 31, Bitvector::all_1_ >> 30, Bitvector::all_1_ >> 29,
72  Bitvector::all_1_ >> 28, Bitvector::all_1_ >> 27, Bitvector::all_1_ >> 26, Bitvector::all_1_ >> 25,
73  Bitvector::all_1_ >> 24, Bitvector::all_1_ >> 23, Bitvector::all_1_ >> 22, Bitvector::all_1_ >> 21,
74  Bitvector::all_1_ >> 20, Bitvector::all_1_ >> 19, Bitvector::all_1_ >> 18, Bitvector::all_1_ >> 17,
75  Bitvector::all_1_ >> 16, Bitvector::all_1_ >> 15, Bitvector::all_1_ >> 14, Bitvector::all_1_ >> 13,
76  Bitvector::all_1_ >> 12, Bitvector::all_1_ >> 11, Bitvector::all_1_ >> 10, Bitvector::all_1_ >> 9,
77  Bitvector::all_1_ >> 8, Bitvector::all_1_ >> 7, Bitvector::all_1_ >> 6, Bitvector::all_1_ >> 5,
78  Bitvector::all_1_ >> 4, Bitvector::all_1_ >> 3, Bitvector::all_1_ >> 2, Bitvector::all_1_ >> 1
79 }};
80 
81 const std::array<Bitvector::IntType, 4> Bitvector::count_mask_ =
82 {{
83  0x5555555555555555, //binary: 0101...
84  0x3333333333333333, //binary: 00110011...
85  0x0f0f0f0f0f0f0f0f, //binary: 4 zeros, 4 ones...
86  0x0101010101010101 //the sum of 256 to the power of 0,1,2,3...
87 }};
88 
89 // =============================================================================
90 // Constructor and Rule of Five
91 // =============================================================================
92 
93 Bitvector::Bitvector( std::string const& values )
94  : Bitvector::Bitvector( values.size(), false )
95 {
96  for( size_t i = 0; i < values.size(); ++i ) {
97  switch( values[i] ) {
98  case '0':
99  break;
100  case '1':
101  set(i);
102  break;
103  default:
104  throw std::invalid_argument(
105  "Cannot construct Bitvector from std::string that contains characters "
106  "other than 0 and 1."
107  );
108  }
109  }
110 }
111 
112 Bitvector::Bitvector( Bitvector const& other, size_t max_size )
113 {
114  if( max_size > other.size() ) {
115  max_size = other.size();
116  }
117  size_ = max_size;
118  auto const ds = (size_ / IntSize) + (size_ % IntSize == 0 ? 0 : 1);
119  assert( ds <= other.data_.size() );
120  data_ = std::vector<IntType>( other.data_.begin(), other.data_.begin() + ds );
121  unset_padding_();
122 }
123 
124 // =============================================================================
125 // Operators
126 // =============================================================================
127 
129 {
130  if( size_ != rhs.size_ ) {
131  throw std::runtime_error(
132  "Cannot use operator `&` or `&=` on Bitvectors of different size. "
133  "Use bitwise_and() instead."
134  );
135  }
136 
137  for (size_t i = 0; i < data_.size(); ++i) {
138  data_[i] &= rhs.data_[i];
139  }
140  return *this;
141 }
142 
144 {
145  if( size_ != rhs.size_ ) {
146  throw std::runtime_error(
147  "Cannot use operator `|` or `|=` on Bitvectors of different size. "
148  "Use bitwise_or() instead."
149  );
150  }
151 
152  for (size_t i = 0; i < data_.size(); ++i) {
153  data_[i] |= rhs.data_[i];
154  }
155  return *this;
156 }
157 
159 {
160  if( size_ != rhs.size_ ) {
161  throw std::runtime_error(
162  "Cannot use operator `^` or `^=` on Bitvectors of different size. "
163  "Use bitwise_xor() instead."
164  );
165  }
166 
167  for (size_t i = 0; i < data_.size(); ++i) {
168  data_[i] ^= rhs.data_[i];
169  }
170  return *this;
171 }
172 
174 {
175  Bitvector cpy = Bitvector(*this);
176  cpy.negate();
177  return cpy;
178 }
179 
180 Bitvector operator & (Bitvector const& lhs, Bitvector const& rhs)
181 {
182  Bitvector result = Bitvector(lhs);
183  result &= rhs;
184  return result;
185 }
186 
187 Bitvector operator | (Bitvector const& lhs, Bitvector const& rhs)
188 {
189  Bitvector result = Bitvector(lhs);
190  result |= rhs;
191  return result;
192 }
193 
194 Bitvector operator ^ (Bitvector const& lhs, Bitvector const& rhs)
195 {
196  Bitvector result = Bitvector(lhs);
197  result ^= rhs;
198  return result;
199 }
200 
201 bool Bitvector::operator == (const Bitvector &other) const
202 {
203  if (size_ != other.size_) {
204  return false;
205  }
206  for (size_t i = 0; i < data_.size(); ++i) {
207  if (data_[i] != other.data_[i]) {
208  return false;
209  }
210  }
211  return true;
212 }
213 
214 bool Bitvector::operator != (const Bitvector &other) const
215 {
216  return !(*this == other);
217 }
218 
219 // =============================================================================
220 // Other Functions
221 // =============================================================================
222 
223 size_t Bitvector::count() const
224 {
225  size_t res = 0;
226  for (IntType x : data_) {
227  // put count of each 2 bits into those 2 bits
228  x -= (x >> 1) & count_mask_[0];
229 
230  // put count of each 4 bits into those 4 bits
231  x = (x & count_mask_[1]) + ((x >> 2) & count_mask_[1]);
232 
233  // put count of each 8 bits into those 8 bits
234  x = (x + (x >> 4)) & count_mask_[2];
235 
236  // take left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
237  res += (x * count_mask_[3]) >> 56;
238  }
239 
240  // safe, but slow version...
241  //~ size_t tmp = 0;
242  //~ for (size_t i = 0; i < size_; ++i) {
243  //~ if (get(i)) {
244  //~ ++tmp;
245  //~ }
246  //~ }
247  //~ assert(tmp == res);
248 
249  return res;
250 }
251 
252 size_t Bitvector::hash() const
253 {
254  std::size_t res = 0;
255  for( auto const& d : data_ ) {
256  res = hash_combine( res, d );
257  }
258  return res;
259 }
260 
262 {
263  IntType res = 0;
264  for( auto const& d : data_ ) {
265  res ^= d;
266  }
267  return res;
268 }
269 
271 {
272  // flip all bits.
273  for (size_t i = 0; i < data_.size(); ++i) {
274  data_[i] = ~ data_[i];
275  }
276 
277  // reset the surplus bits at the end of the vector.
278  unset_padding_();
279 }
280 
282 {
283  if (size_ > 0 && get(0)) {
284  negate();
285  }
286 }
287 
288 void Bitvector::set_all( const bool value )
289 {
290  // set according to flag.
291  const auto v = value ? all_1_ : all_0_;
292  for (size_t i = 0; i < data_.size(); ++i) {
293  data_[i] = v;
294  }
295 
296  // if we initialized with true, we need to unset the surplus bits at the end!
297  if (value) {
298  unset_padding_();
299  }
300 }
301 
302 void Bitvector::unset_padding_()
303 {
304  // Only apply if there are actual padding bits.
305  // if(( size_ % IntSize ) == 0 ) {
306  // return;
307  // }
308  // --> Nope, we have changed the mask to be all-one for its first entry, so that we
309  // can avoid the branching here!
310 
311  assert( size_ % IntSize < ones_mask_.size() );
312  data_.back() &= ones_mask_[ size_ % IntSize ];
313 
314  // other versions that might be helpful if i messed up with this little/big endian stuff...
315  // first one is slow but definitely works, second one is fast, but might have the same
316  // issue as the used version above (which currently works perfectly).
317  //~ for (size_t i = size_ % IntSize; i < IntSize; ++i) {
318  //~ data_.back() &= ~bit_mask_[i];
319  //~ }
320  //~ data_.back() &= bit_mask_[size_ % IntSize] - 1;
321 }
322 
323 // =============================================================================
324 // Dump and Debug
325 // =============================================================================
326 
327 std::string Bitvector::dump() const
328 {
329  std::string res = "[" + std::to_string(size_) + "]\n";
330  for (size_t i = 0; i < size_; ++i) {
331  res += (*this)[i] ? "1" : "0";
332  if ((i+1) % 64 == 0) {
333  res += "\n";
334  } else if ((i+1) % 8 == 0) {
335  res += " ";
336  }
337  }
338  return res;
339 }
340 
341 std::string Bitvector::dump_int(IntType x) const
342 {
343  std::string res = "";
344  for (size_t i = 0; i < IntSize; ++i) {
345  res += (x & bit_mask_[i] ? "1" : "0");
346  if ((i+1) % 8 == 0) {
347  res += " ";
348  }
349  }
350  return res;
351 }
352 
353 } // namespace utils
354 } // namespace genesis
Bitvector & operator^=(Bitvector const &rhs)
Definition: bitvector.cpp:158
std::size_t hash_combine(std::size_t seed, T const &value)
Combine a seed value (e.g., another hash) with the hash of a given type.
Definition: std.hpp:113
Bitvector & operator&=(Bitvector const &rhs)
size_t count() const
Count the number of set bits in the Bitvector, that is, its Hamming weight, or population count (popc...
Definition: bitvector.cpp:223
std::string dump() const
Definition: bitvector.cpp:327
IntType x_hash() const
Return a hash value of type IntType that is quicker to calculate than hash().
Definition: bitvector.cpp:261
static const size_t IntSize
Definition: bitvector.hpp:57
Bitvector()=default
Default constructor. Creates an empty Bitvector of size 0.
Bitvector operator~() const
Definition: bitvector.cpp:173
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
Provides some valuable additions to STD.
void negate()
Flip all bits.
Definition: bitvector.cpp:270
friend Bitvector operator&(Bitvector const &lhs, Bitvector const &rhs)
Definition: bitvector.cpp:180
bool operator==(const Bitvector &other) const
Definition: bitvector.cpp:201
bool operator!=(const Bitvector &other) const
Definition: bitvector.cpp:214
friend Bitvector operator^(Bitvector const &lhs, Bitvector const &rhs)
Definition: bitvector.cpp:194
void normalize()
Bring the Bitvector in a normalized form, where the first bit is always zero.
Definition: bitvector.cpp:281
Bitvector & operator|=(Bitvector const &rhs)
Definition: bitvector.cpp:143
std::string dump_int(IntType x) const
Definition: bitvector.cpp:341
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
void set_all(const bool value=false)
Set all the bits to a specified value.
Definition: bitvector.cpp:288
size_t hash() const
Return an std::hash value for the Bitvector.
Definition: bitvector.cpp:252
size_t size() const
Return the size (number of bits) of this Bitvector.
Definition: bitvector.hpp:230
friend Bitvector operator|(Bitvector const &lhs, Bitvector const &rhs)
Definition: bitvector.cpp:187