A toolkit for working with phylogenetic data.
v0.22.1
sha1.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  The implementation is based on [https://github.com/vog/sha1](https://github.com/vog/sha1),
33  which is 100% Public Domain, see also the Acknowledgements section of the documentation.
34 */
35 
37 
39 
40 #include <algorithm>
41 #include <cinttypes>
42 #include <cstdio>
43 #include <fstream>
44 #include <iomanip>
45 #include <iostream>
46 #include <sstream>
47 #include <stdexcept>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // ================================================================================================
53 // Constructors and Rule of Five
54 // ================================================================================================
55 
57 {
58  reset_();
59 }
60 
61 // ================================================================================================
62 // Full Hashing
63 // ================================================================================================
64 
65 std::string SHA1::read_hex( std::shared_ptr<BaseInputSource> source )
66 {
67  SHA1 checksum;
68  checksum.update( source );
69  return checksum.final_hex();
70 }
71 
72 SHA1::DigestType SHA1::read_digest( std::shared_ptr<BaseInputSource> source )
73 {
74  SHA1 checksum;
75  checksum.update( source );
76  return checksum.final_digest();
77 }
78 
79 std::string SHA1::digest_to_hex( SHA1::DigestType const& digest )
80 {
81  /* Hex std::string */
82  std::ostringstream result;
83  for (size_t i = 0; i < digest.size(); ++i) {
84  result << std::hex << std::setfill('0') << std::setw(8);
85  result << digest[i];
86  }
87 
88  return result.str();
89 }
90 
91 SHA1::DigestType SHA1::hex_to_digest( std::string const& hex )
92 {
93  // Safety first!
94  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
95  return std::isxdigit( c );
96  });
97  if( hex.size() != 40 || ! all_hex ) {
98  throw std::runtime_error( "Invalid SHA1 hex string." );
99  }
100 
101  // The following test was introduced to check the scanf format "%8x",
102  // which just is an "unsigned int", which is not a fixed size.
103  // We now use the SCNxN typedefs that offer fixed width replacements, see
104  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
105 
106  // Make sure that the scan works! Need to have 32 bit type.
107  // static_assert(
108  // sizeof( unsigned int ) == 4,
109  // "Cannot compile SHA1::hex_to_digest() with sizeof( unsigned int ) != 4"
110  // );
111 
112  // Convert.
113  SHA1::DigestType result;
114  for (size_t i = 0; i < result.size(); ++i) {
115  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
116  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
117  if( n != 1 ) {
118  throw std::runtime_error( "Invalid SHA1 hex string." );
119  }
120  }
121 
122  return result;
123 }
124 
125 // ================================================================================================
126 // Member Functions
127 // ================================================================================================
128 
130 {
131  reset_();
132 }
133 
134 void SHA1::update( std::shared_ptr<BaseInputSource> source )
135 {
136  auto ib = InputBuffer( source );
137  char sbuf[SHA1::BlockBytes];
138 
139  while (true) {
140 
141  // Read a block and use it for an update.
142  auto count = ib.read( sbuf, SHA1::BlockBytes - buffer_.size() );
143  buffer_.append( sbuf, count );
144 
145  // If we didn't get a full block, the input is done.
146  if (buffer_.size() != SHA1::BlockBytes) {
147  return;
148  }
149 
150  // If the buffer is full, transform it.
151  uint32_t block[SHA1::BlockInts];
152  buffer_to_block_(buffer_, block);
153  transform_( block );
154  buffer_.clear();
155  }
156 }
157 
158 void SHA1::update( std::string const& s )
159 {
160  std::istringstream is(s);
161  update(is);
162 }
163 
164 void SHA1::update( std::istream& is )
165 {
166  char sbuf[SHA1::BlockBytes];
167 
168  while (true) {
169  is.read(sbuf, SHA1::BlockBytes - buffer_.size());
170  buffer_.append(sbuf, is.gcount());
171  if (buffer_.size() != SHA1::BlockBytes) {
172  return;
173  }
174  uint32_t block[SHA1::BlockInts];
175  buffer_to_block_(buffer_, block);
176  transform_( block );
177  buffer_.clear();
178  }
179 }
180 
181 std::string SHA1::final_hex()
182 {
183  // Calculate digest, also reset for next use.
184  return digest_to_hex( final_digest() );
185 }
186 
188 {
189  /* Total number of hashed bits */
190  uint64_t total_bits = (transforms_*SHA1::BlockBytes + buffer_.size()) * 8;
191 
192  /* Padding */
193  buffer_ += static_cast<char>( 0x80 );
194  size_t orig_size = buffer_.size();
195  while (buffer_.size() < SHA1::BlockBytes) {
196  buffer_ += static_cast<char>( 0x00 );
197  }
198 
199  uint32_t block[SHA1::BlockInts];
200  buffer_to_block_(buffer_, block);
201 
202  if (orig_size > SHA1::BlockBytes - 8) {
203  transform_( block );
204  for (size_t i = 0; i < SHA1::BlockInts - 2; i++) {
205  block[i] = 0;
206  }
207  }
208 
209  /* Append total_bits, split this uint64_t into two uint32_t */
210  block[SHA1::BlockInts - 1] = total_bits;
211  block[SHA1::BlockInts - 2] = (total_bits >> 32);
212  transform_( block );
213 
214  auto result = digest_;
215 
216  /* Reset for next run */
217  reset_();
218 
219  return result;
220 }
221 
222 // ================================================================================================
223 // Internal Functions
224 // ================================================================================================
225 
226 void SHA1::reset_()
227 {
228  /* SHA1 initialization constants */
229  digest_[0] = 0x67452301;
230  digest_[1] = 0xefcdab89;
231  digest_[2] = 0x98badcfe;
232  digest_[3] = 0x10325476;
233  digest_[4] = 0xc3d2e1f0;
234 
235  /* Reset counters */
236  buffer_ = "";
237  transforms_ = 0;
238 }
239 
240 uint32_t SHA1::rol_(const uint32_t value, const size_t bits)
241 {
242  return (value << bits) | (value >> (32 - bits));
243 }
244 
245 uint32_t SHA1::blk_(const uint32_t block[SHA1::BlockInts], const size_t i)
246 {
247  return rol_(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i], 1);
248 }
249 
250 void SHA1::R0_(
251  const uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
252  const uint32_t y, uint32_t& z, const size_t i
253 ) {
254  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
255  w = rol_(w, 30);
256 }
257 
258 void SHA1::R1_(
259  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
260  const uint32_t y, uint32_t& z, const size_t i
261 ) {
262  block[i] = blk_(block, i);
263  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
264  w = rol_(w, 30);
265 }
266 
267 void SHA1::R2_(
268  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
269  const uint32_t y, uint32_t& z, const size_t i
270 ) {
271  block[i] = blk_(block, i);
272  z += (w^x^y) + block[i] + 0x6ed9eba1 + rol_(v, 5);
273  w = rol_(w, 30);
274 }
275 
276 void SHA1::R3_(
277  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
278  const uint32_t y, uint32_t& z, const size_t i
279 ) {
280  block[i] = blk_(block, i);
281  z += (((w|x)&y)|(w&x)) + block[i] + 0x8f1bbcdc + rol_(v, 5);
282  w = rol_(w, 30);
283 }
284 
285 
286 void SHA1::R4_(
287  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
288  const uint32_t y, uint32_t& z, const size_t i
289 ) {
290  block[i] = blk_(block, i);
291  z += (w^x^y) + block[i] + 0xca62c1d6 + rol_(v, 5);
292  w = rol_(w, 30);
293 }
294 
295 void SHA1::transform_( uint32_t block[SHA1::BlockInts] )
296 {
297  // Hash a single 512-bit block. This is the core of the algorithm.
298 
299  // Copy digest[] to working vars
300  uint32_t a = digest_[0];
301  uint32_t b = digest_[1];
302  uint32_t c = digest_[2];
303  uint32_t d = digest_[3];
304  uint32_t e = digest_[4];
305 
306  // 4 rounds of 20 operations each. Loop unrolled.
307  R0_(block, a, b, c, d, e, 0);
308  R0_(block, e, a, b, c, d, 1);
309  R0_(block, d, e, a, b, c, 2);
310  R0_(block, c, d, e, a, b, 3);
311  R0_(block, b, c, d, e, a, 4);
312  R0_(block, a, b, c, d, e, 5);
313  R0_(block, e, a, b, c, d, 6);
314  R0_(block, d, e, a, b, c, 7);
315  R0_(block, c, d, e, a, b, 8);
316  R0_(block, b, c, d, e, a, 9);
317  R0_(block, a, b, c, d, e, 10);
318  R0_(block, e, a, b, c, d, 11);
319  R0_(block, d, e, a, b, c, 12);
320  R0_(block, c, d, e, a, b, 13);
321  R0_(block, b, c, d, e, a, 14);
322  R0_(block, a, b, c, d, e, 15);
323  R1_(block, e, a, b, c, d, 0);
324  R1_(block, d, e, a, b, c, 1);
325  R1_(block, c, d, e, a, b, 2);
326  R1_(block, b, c, d, e, a, 3);
327  R2_(block, a, b, c, d, e, 4);
328  R2_(block, e, a, b, c, d, 5);
329  R2_(block, d, e, a, b, c, 6);
330  R2_(block, c, d, e, a, b, 7);
331  R2_(block, b, c, d, e, a, 8);
332  R2_(block, a, b, c, d, e, 9);
333  R2_(block, e, a, b, c, d, 10);
334  R2_(block, d, e, a, b, c, 11);
335  R2_(block, c, d, e, a, b, 12);
336  R2_(block, b, c, d, e, a, 13);
337  R2_(block, a, b, c, d, e, 14);
338  R2_(block, e, a, b, c, d, 15);
339  R2_(block, d, e, a, b, c, 0);
340  R2_(block, c, d, e, a, b, 1);
341  R2_(block, b, c, d, e, a, 2);
342  R2_(block, a, b, c, d, e, 3);
343  R2_(block, e, a, b, c, d, 4);
344  R2_(block, d, e, a, b, c, 5);
345  R2_(block, c, d, e, a, b, 6);
346  R2_(block, b, c, d, e, a, 7);
347  R3_(block, a, b, c, d, e, 8);
348  R3_(block, e, a, b, c, d, 9);
349  R3_(block, d, e, a, b, c, 10);
350  R3_(block, c, d, e, a, b, 11);
351  R3_(block, b, c, d, e, a, 12);
352  R3_(block, a, b, c, d, e, 13);
353  R3_(block, e, a, b, c, d, 14);
354  R3_(block, d, e, a, b, c, 15);
355  R3_(block, c, d, e, a, b, 0);
356  R3_(block, b, c, d, e, a, 1);
357  R3_(block, a, b, c, d, e, 2);
358  R3_(block, e, a, b, c, d, 3);
359  R3_(block, d, e, a, b, c, 4);
360  R3_(block, c, d, e, a, b, 5);
361  R3_(block, b, c, d, e, a, 6);
362  R3_(block, a, b, c, d, e, 7);
363  R3_(block, e, a, b, c, d, 8);
364  R3_(block, d, e, a, b, c, 9);
365  R3_(block, c, d, e, a, b, 10);
366  R3_(block, b, c, d, e, a, 11);
367  R4_(block, a, b, c, d, e, 12);
368  R4_(block, e, a, b, c, d, 13);
369  R4_(block, d, e, a, b, c, 14);
370  R4_(block, c, d, e, a, b, 15);
371  R4_(block, b, c, d, e, a, 0);
372  R4_(block, a, b, c, d, e, 1);
373  R4_(block, e, a, b, c, d, 2);
374  R4_(block, d, e, a, b, c, 3);
375  R4_(block, c, d, e, a, b, 4);
376  R4_(block, b, c, d, e, a, 5);
377  R4_(block, a, b, c, d, e, 6);
378  R4_(block, e, a, b, c, d, 7);
379  R4_(block, d, e, a, b, c, 8);
380  R4_(block, c, d, e, a, b, 9);
381  R4_(block, b, c, d, e, a, 10);
382  R4_(block, a, b, c, d, e, 11);
383  R4_(block, e, a, b, c, d, 12);
384  R4_(block, d, e, a, b, c, 13);
385  R4_(block, c, d, e, a, b, 14);
386  R4_(block, b, c, d, e, a, 15);
387 
388  /* Add the working vars back into digest[] */
389  digest_[0] += a;
390  digest_[1] += b;
391  digest_[2] += c;
392  digest_[3] += d;
393  digest_[4] += e;
394 
395  /* Count the number of transformations */
396  ++transforms_;
397 }
398 
399 void SHA1::buffer_to_block_(const std::string& buffer, uint32_t block[SHA1::BlockInts])
400 {
401  // Convert the std::string (byte buffer) to a uint32_t array (MSB)
402 
403  for (size_t i = 0; i < SHA1::BlockInts; i++) {
404  block[i] = ( buffer[4*i+3] & 0xff )
405  | ( buffer[4*i+2] & 0xff ) << 8
406  | ( buffer[4*i+1] & 0xff ) << 16
407  | ( buffer[4*i+0] & 0xff ) << 24;
408  }
409 }
410 
411 } // namespace utils
412 } // namespace genesis
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha1.cpp:187
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: sha1.cpp:72
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha1.cpp:181
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::array< uint32_t, 5 > DigestType
Store a SHA1 digest.
Definition: sha1.hpp:83
static const size_t BlockInts
Definition: sha1.hpp:73
static const size_t BlockBytes
Definition: sha1.hpp:74
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha1.cpp:129
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: sha1.cpp:65
static std::string digest_to_hex(DigestType const &digest)
Definition: sha1.cpp:79
Calculate SHA1 hashes for strings and files.
Definition: sha1.hpp:64
void update(std::shared_ptr< BaseInputSource > source)
Definition: sha1.cpp:134
SHA1()
Initialize the object for use.
Definition: sha1.cpp:56
static DigestType hex_to_digest(std::string const &hex)
Definition: sha1.cpp:91