A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bitvector.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <functional>
35 #include <stdexcept>
36 
37 namespace genesis {
38 namespace utils {
39 
40 // =============================================================================
41 // Constants
42 // =============================================================================
43 
44 const Bitvector::IntType Bitvector::all_0_ = 0ul;
45  const Bitvector::IntType Bitvector::all_1_ = (((1ul << 32) - 1) << 32) + ((1ul << 32) - 1);
46 
50 const Bitvector::IntType Bitvector::bit_mask_[Bitvector::IntSize] =
51 {
52  1ul << 0, 1ul << 1, 1ul << 2, 1ul << 3, 1ul << 4, 1ul << 5, 1ul << 6, 1ul << 7,
53  1ul << 8, 1ul << 9, 1ul << 10, 1ul << 11, 1ul << 12, 1ul << 13, 1ul << 14, 1ul << 15,
54  1ul << 16, 1ul << 17, 1ul << 18, 1ul << 19, 1ul << 20, 1ul << 21, 1ul << 22, 1ul << 23,
55  1ul << 24, 1ul << 25, 1ul << 26, 1ul << 27, 1ul << 28, 1ul << 29, 1ul << 30, 1ul << 31,
56  1ul << 32, 1ul << 33, 1ul << 34, 1ul << 35, 1ul << 36, 1ul << 37, 1ul << 38, 1ul << 39,
57  1ul << 40, 1ul << 41, 1ul << 42, 1ul << 43, 1ul << 44, 1ul << 45, 1ul << 46, 1ul << 47,
58  1ul << 48, 1ul << 49, 1ul << 50, 1ul << 51, 1ul << 52, 1ul << 53, 1ul << 54, 1ul << 55,
59  1ul << 56, 1ul << 57, 1ul << 58, 1ul << 59, 1ul << 60, 1ul << 61, 1ul << 62, 1ul << 63
60 };
61 
74 const Bitvector::IntType Bitvector::ones_mask_[Bitvector::IntSize] =
75 {
76  Bitvector::all_0_, Bitvector::all_1_ >> 63, Bitvector::all_1_ >> 62, Bitvector::all_1_ >> 61,
77  Bitvector::all_1_ >> 60, Bitvector::all_1_ >> 59, Bitvector::all_1_ >> 58, Bitvector::all_1_ >> 57,
78  Bitvector::all_1_ >> 56, Bitvector::all_1_ >> 55, Bitvector::all_1_ >> 54, Bitvector::all_1_ >> 53,
79  Bitvector::all_1_ >> 52, Bitvector::all_1_ >> 51, Bitvector::all_1_ >> 50, Bitvector::all_1_ >> 49,
80  Bitvector::all_1_ >> 48, Bitvector::all_1_ >> 47, Bitvector::all_1_ >> 46, Bitvector::all_1_ >> 45,
81  Bitvector::all_1_ >> 44, Bitvector::all_1_ >> 43, Bitvector::all_1_ >> 42, Bitvector::all_1_ >> 41,
82  Bitvector::all_1_ >> 40, Bitvector::all_1_ >> 39, Bitvector::all_1_ >> 38, Bitvector::all_1_ >> 37,
83  Bitvector::all_1_ >> 36, Bitvector::all_1_ >> 35, Bitvector::all_1_ >> 34, Bitvector::all_1_ >> 33,
84  Bitvector::all_1_ >> 32, Bitvector::all_1_ >> 31, Bitvector::all_1_ >> 30, Bitvector::all_1_ >> 29,
85  Bitvector::all_1_ >> 28, Bitvector::all_1_ >> 27, Bitvector::all_1_ >> 26, Bitvector::all_1_ >> 25,
86  Bitvector::all_1_ >> 24, Bitvector::all_1_ >> 23, Bitvector::all_1_ >> 22, Bitvector::all_1_ >> 21,
87  Bitvector::all_1_ >> 20, Bitvector::all_1_ >> 19, Bitvector::all_1_ >> 18, Bitvector::all_1_ >> 17,
88  Bitvector::all_1_ >> 16, Bitvector::all_1_ >> 15, Bitvector::all_1_ >> 14, Bitvector::all_1_ >> 13,
89  Bitvector::all_1_ >> 12, Bitvector::all_1_ >> 11, Bitvector::all_1_ >> 10, Bitvector::all_1_ >> 9,
90  Bitvector::all_1_ >> 8, Bitvector::all_1_ >> 7, Bitvector::all_1_ >> 6, Bitvector::all_1_ >> 5,
91  Bitvector::all_1_ >> 4, Bitvector::all_1_ >> 3, Bitvector::all_1_ >> 2, Bitvector::all_1_ >> 1
92 };
93 
97 const Bitvector::IntType Bitvector::count_mask_[4] =
98 {
99  0x5555555555555555, //binary: 0101...
100  0x3333333333333333, //binary: 00110011...
101  0x0f0f0f0f0f0f0f0f, //binary: 4 zeros, 4 ones...
102  0x0101010101010101 //the sum of 256 to the power of 0,1,2,3...
103 };
104 
105 // =============================================================================
106 // Operators
107 // =============================================================================
108 
110 {
111  if( size_ != rhs.size_ ) {
112  throw std::runtime_error( "Cannot use operator on Bitvectors of different size." );
113  }
114 
115  for (size_t i = 0; i < data_.size(); ++i) {
116  data_[i] &= rhs.data_[i];
117  }
118  return *this;
119 }
120 
122 {
123  if( size_ != rhs.size_ ) {
124  throw std::runtime_error( "Cannot use operator on Bitvectors of different size." );
125  }
126 
127  for (size_t i = 0; i < data_.size(); ++i) {
128  data_[i] |= rhs.data_[i];
129  }
130  return *this;
131 }
132 
134 {
135  if( size_ != rhs.size_ ) {
136  throw std::runtime_error( "Cannot use operator on Bitvectors of different size." );
137  }
138 
139  for (size_t i = 0; i < data_.size(); ++i) {
140  data_[i] ^= rhs.data_[i];
141  }
142  return *this;
143 }
144 
146 {
147  Bitvector cpy = Bitvector(*this);
148  cpy.negate();
149  return cpy;
150 }
151 
152 bool Bitvector::operator == (const Bitvector &other) const
153 {
154  if (size_ != other.size_) {
155  return false;
156  }
157  for (size_t i = 0; i < data_.size(); ++i) {
158  if (data_[i] != other.data_[i]) {
159  return false;
160  }
161  }
162  return true;
163 }
164 
165 bool Bitvector::operator != (const Bitvector &other) const
166 {
167  return !(*this == other);
168 }
169 
170 // =============================================================================
171 // Other Functions
172 // =============================================================================
173 
174 size_t Bitvector::count() const
175 {
176  size_t res = 0;
177  for (IntType x : data_) {
178  // put count of each 2 bits into those 2 bits
179  x -= (x >> 1) & count_mask_[0];
180 
181  // put count of each 4 bits into those 4 bits
182  x = (x & count_mask_[1]) + ((x >> 2) & count_mask_[1]);
183 
184  // put count of each 8 bits into those 8 bits
185  x = (x + (x >> 4)) & count_mask_[2];
186 
187  // take left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
188  res += (x * count_mask_[3]) >> 56;
189  }
190 
191  // safe, but slow version...
192  //~ size_t tmp = 0;
193  //~ for (size_t i = 0; i < size_; ++i) {
194  //~ if (get(i)) {
195  //~ ++tmp;
196  //~ }
197  //~ }
198  //~ assert(tmp == res);
199 
200  return res;
201 }
202 
203 size_t Bitvector::hash() const
204 {
205  // TODO this might be a poor hash function. check what kind of value a hash fct needs to return and decide whether xhash is a good choice instead!
206  size_t res = 0;
207  for (size_t i = 0; i < size_; ++i) {
208  if (get(i)) {
209  res ^= std::hash<size_t>()(i);
210  }
211  }
212  return res;
213 }
214 
216 {
217  IntType res = 0;
218  for (IntType e : data_) {
219  res ^= e;
220  }
221  return res;
222 }
223 
225 {
226  // flip all bits.
227  for (size_t i = 0; i < data_.size(); ++i) {
228  data_[i] = ~ data_[i];
229  }
230 
231  // reset the surplus bits at the end of the vector.
232  unset_padding_();
233 }
234 
236 {
237  if (size_ > 0 && get(0)) {
238  negate();
239  }
240 }
241 
242 void Bitvector::set_all( const bool value )
243 {
244  // set according to flag.
245  const auto v = value ? all_1_ : all_0_;
246  for (size_t i = 0; i < data_.size(); ++i) {
247  data_[i] = v;
248  }
249 
250  // if we initialized with true, we need to unset the surplus bits at the end!
251  if (value) {
252  unset_padding_();
253  }
254 }
255 
256 void Bitvector::unset_padding_()
257 {
258  // Only apply if there are actual padding bits.
259  if(( size_ % IntSize ) == 0 ) {
260  return;
261  }
262 
263  data_.back() &= ones_mask_[ size_ % IntSize ];
264 
265  // other versions that might be helpful if i messed up with this little/big endian stuff...
266  // first one is slow but definitely works, second one is fast, but might have the same
267  // issue as the used version above (which currently works perfectly).
268  //~ for (size_t i = size_ % IntSize; i < IntSize; ++i) {
269  //~ data_.back() &= ~bit_mask_[i];
270  //~ }
271  //~ data_.back() &= bit_mask_[size_ % IntSize] - 1;
272 }
273 
274 // =============================================================================
275 // Dump and Debug
276 // =============================================================================
277 
278 std::string Bitvector::dump() const
279 {
280  std::string res = "[" + std::to_string(size_) + "]\n";
281  for (size_t i = 0; i < size_; ++i) {
282  res += (*this)[i] ? "1" : "0";
283  if ((i+1) % 64 == 0) {
284  res += "\n";
285  } else if ((i+1) % 8 == 0) {
286  res += " ";
287  }
288  }
289  return res;
290 }
291 
292 std::string Bitvector::dump_int(IntType x) const
293 {
294  std::string res = "";
295  for (size_t i = 0; i < IntSize; ++i) {
296  res += (x & bit_mask_[i] ? "1" : "0");
297  if ((i+1) % 8 == 0) {
298  res += " ";
299  }
300  }
301  return res;
302 }
303 
304 } // namespace utils
305 } // namespace genesis
Bitvector & operator^=(Bitvector const &rhs)
Definition: bitvector.cpp:133
size_t count() const
Count the number of set bits in the Bitvector, that is, its Hamming weight.
Definition: bitvector.cpp:174
IntType x_hash() const
Return a hash value of type IntType that is quicker to calculate than hash().
Definition: bitvector.cpp:215
size_t hash() const
Return an std::hash value for the Bitvector.
Definition: bitvector.cpp:203
static const size_t IntSize
Definition: bitvector.hpp:55
Bitvector()=default
Default constructor. Creates an empty Bitvector of size 0.
std::string to_string(T const &v)
Return a string representation of a given value.
Definition: string.hpp:381
bool operator==(const Bitvector &other) const
Definition: bitvector.cpp:152
void negate()
Flip all bits.
Definition: bitvector.cpp:224
Bitvector & operator&=(Bitvector const &rhs)
Definition: bitvector.cpp:109
bool operator!=(const Bitvector &other) const
Definition: bitvector.cpp:165
void normalize()
Bring the Bitvector in a normalized form, where the first bit is always zero.
Definition: bitvector.cpp:235
Bitvector & operator|=(Bitvector const &rhs)
Definition: bitvector.cpp:121
Bitvector operator~() const
Definition: bitvector.cpp:145
std::string dump_int(IntType x) const
Definition: bitvector.cpp:292
void set_all(const bool value=false)
Set all the bits to a specified value.
Definition: bitvector.cpp:242
std::string dump() const
Definition: bitvector.cpp:278