A library for working with phylogenetic and population genetic data.
v0.32.0
sha1.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2023 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 /*
32  The implementation is based on [https://github.com/vog/sha1](https://github.com/vog/sha1),
33  which is 100% Public Domain, see also the Acknowledgements section of the documentation.
34 */
35 
37 
39 
40 #include <algorithm>
41 #include <cstdlib>
42 #include <cstdio>
43 #include <fstream>
44 #include <iomanip>
45 #include <iostream>
46 #include <sstream>
47 #include <stdexcept>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // ================================================================================================
53 // Constructors and Rule of Five
54 // ================================================================================================
55 
57 {
58  reset_();
59 }
60 
61 // ================================================================================================
62 // Full Hashing
63 // ================================================================================================
64 
65 std::string SHA1::read_hex( std::shared_ptr<BaseInputSource> source )
66 {
67  SHA1 checksum;
68  checksum.update( source );
69  return checksum.final_hex();
70 }
71 
72 SHA1::DigestType SHA1::read_digest( std::shared_ptr<BaseInputSource> source )
73 {
74  SHA1 checksum;
75  checksum.update( source );
76  return checksum.final_digest();
77 }
78 
79 std::string SHA1::digest_to_hex( SHA1::DigestType const& digest )
80 {
81  /* Hex std::string */
82  std::ostringstream result;
83  for (size_t i = 0; i < digest.size(); ++i) {
84  result << std::hex << std::setfill('0') << std::setw(8);
85  result << digest[i];
86  }
87 
88  return result.str();
89 }
90 
91 SHA1::DigestType SHA1::hex_to_digest( std::string const& hex )
92 {
93  // Safety first!
94  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
95  return std::isxdigit( c );
96  });
97  if( hex.size() != 40 || ! all_hex ) {
98  throw std::runtime_error( "Invalid SHA1 hex string." );
99  }
100 
101  // Convert.
102  SHA1::DigestType result;
103  for (size_t i = 0; i < result.size(); ++i) {
104 
105  // Read the symbols into the digest. We tried this before using sscanf(), but that got quite
106  // messy, as the int widths of the hex format macros are not consistent across compilers...
107  // So now we copy the individual fragements to a string. Bit expensive, but a digest is short.
108  std::string const sub = hex.substr( 8*i, 8 );
109  char* endptr;
110  auto const res = strtoul( sub.c_str(), &endptr, 16 );
111  if( *endptr != 0 ) {
112  throw std::runtime_error( "Invalid SHA1 hex string: \"" + hex + "\"" );
113  }
114  result[i] = static_cast<uint32_t>(res);
115  }
116 
117  return result;
118 }
119 
120 // ================================================================================================
121 // Member Functions
122 // ================================================================================================
123 
125 {
126  reset_();
127 }
128 
129 void SHA1::update( std::shared_ptr<BaseInputSource> source )
130 {
131  auto ib = InputBuffer( source );
132  char sbuf[SHA1::BlockBytes];
133 
134  while (true) {
135 
136  // Read a block and use it for an update.
137  auto count = ib.read( sbuf, SHA1::BlockBytes - buffer_.size() );
138  buffer_.append( sbuf, count );
139 
140  // If we didn't get a full block, the input is done.
141  if (buffer_.size() != SHA1::BlockBytes) {
142  return;
143  }
144 
145  // If the buffer is full, transform it.
146  uint32_t block[SHA1::BlockInts];
147  buffer_to_block_(buffer_, block);
148  transform_( block );
149  buffer_.clear();
150  }
151 }
152 
153 void SHA1::update( std::string const& s )
154 {
155  std::istringstream is(s);
156  update(is);
157 }
158 
159 void SHA1::update( std::istream& is )
160 {
161  char sbuf[SHA1::BlockBytes];
162 
163  while (true) {
164  is.read(sbuf, SHA1::BlockBytes - buffer_.size());
165  buffer_.append(sbuf, is.gcount());
166  if (buffer_.size() != SHA1::BlockBytes) {
167  return;
168  }
169  uint32_t block[SHA1::BlockInts];
170  buffer_to_block_(buffer_, block);
171  transform_( block );
172  buffer_.clear();
173  }
174 }
175 
176 std::string SHA1::final_hex()
177 {
178  // Calculate digest, also reset for next use.
179  return digest_to_hex( final_digest() );
180 }
181 
183 {
184  /* Total number of hashed bits */
185  uint64_t total_bits = (transforms_*SHA1::BlockBytes + buffer_.size()) * 8;
186 
187  /* Padding */
188  buffer_ += static_cast<char>( 0x80 );
189  size_t orig_size = buffer_.size();
190  while (buffer_.size() < SHA1::BlockBytes) {
191  buffer_ += static_cast<char>( 0x00 );
192  }
193 
194  uint32_t block[SHA1::BlockInts];
195  buffer_to_block_(buffer_, block);
196 
197  if (orig_size > SHA1::BlockBytes - 8) {
198  transform_( block );
199  for (size_t i = 0; i < SHA1::BlockInts - 2; i++) {
200  block[i] = 0;
201  }
202  }
203 
204  /* Append total_bits, split this uint64_t into two uint32_t */
205  block[SHA1::BlockInts - 1] = total_bits;
206  block[SHA1::BlockInts - 2] = (total_bits >> 32);
207  transform_( block );
208 
209  auto result = digest_;
210 
211  /* Reset for next run */
212  reset_();
213 
214  return result;
215 }
216 
217 // ================================================================================================
218 // Internal Functions
219 // ================================================================================================
220 
221 void SHA1::reset_()
222 {
223  /* SHA1 initialization constants */
224  digest_[0] = 0x67452301;
225  digest_[1] = 0xefcdab89;
226  digest_[2] = 0x98badcfe;
227  digest_[3] = 0x10325476;
228  digest_[4] = 0xc3d2e1f0;
229 
230  /* Reset counters */
231  buffer_ = "";
232  transforms_ = 0;
233 }
234 
235 uint32_t SHA1::rol_(const uint32_t value, const size_t bits)
236 {
237  return (value << bits) | (value >> (32 - bits));
238 }
239 
240 uint32_t SHA1::blk_(const uint32_t block[SHA1::BlockInts], const size_t i)
241 {
242  return rol_(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i], 1);
243 }
244 
245 void SHA1::R0_(
246  const uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
247  const uint32_t y, uint32_t& z, const size_t i
248 ) {
249  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
250  w = rol_(w, 30);
251 }
252 
253 void SHA1::R1_(
254  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
255  const uint32_t y, uint32_t& z, const size_t i
256 ) {
257  block[i] = blk_(block, i);
258  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
259  w = rol_(w, 30);
260 }
261 
262 void SHA1::R2_(
263  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
264  const uint32_t y, uint32_t& z, const size_t i
265 ) {
266  block[i] = blk_(block, i);
267  z += (w^x^y) + block[i] + 0x6ed9eba1 + rol_(v, 5);
268  w = rol_(w, 30);
269 }
270 
271 void SHA1::R3_(
272  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
273  const uint32_t y, uint32_t& z, const size_t i
274 ) {
275  block[i] = blk_(block, i);
276  z += (((w|x)&y)|(w&x)) + block[i] + 0x8f1bbcdc + rol_(v, 5);
277  w = rol_(w, 30);
278 }
279 
280 
281 void SHA1::R4_(
282  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
283  const uint32_t y, uint32_t& z, const size_t i
284 ) {
285  block[i] = blk_(block, i);
286  z += (w^x^y) + block[i] + 0xca62c1d6 + rol_(v, 5);
287  w = rol_(w, 30);
288 }
289 
290 void SHA1::transform_( uint32_t block[SHA1::BlockInts] )
291 {
292  // Hash a single 512-bit block. This is the core of the algorithm.
293 
294  // Copy digest[] to working vars
295  uint32_t a = digest_[0];
296  uint32_t b = digest_[1];
297  uint32_t c = digest_[2];
298  uint32_t d = digest_[3];
299  uint32_t e = digest_[4];
300 
301  // 4 rounds of 20 operations each. Loop unrolled.
302  R0_(block, a, b, c, d, e, 0);
303  R0_(block, e, a, b, c, d, 1);
304  R0_(block, d, e, a, b, c, 2);
305  R0_(block, c, d, e, a, b, 3);
306  R0_(block, b, c, d, e, a, 4);
307  R0_(block, a, b, c, d, e, 5);
308  R0_(block, e, a, b, c, d, 6);
309  R0_(block, d, e, a, b, c, 7);
310  R0_(block, c, d, e, a, b, 8);
311  R0_(block, b, c, d, e, a, 9);
312  R0_(block, a, b, c, d, e, 10);
313  R0_(block, e, a, b, c, d, 11);
314  R0_(block, d, e, a, b, c, 12);
315  R0_(block, c, d, e, a, b, 13);
316  R0_(block, b, c, d, e, a, 14);
317  R0_(block, a, b, c, d, e, 15);
318  R1_(block, e, a, b, c, d, 0);
319  R1_(block, d, e, a, b, c, 1);
320  R1_(block, c, d, e, a, b, 2);
321  R1_(block, b, c, d, e, a, 3);
322  R2_(block, a, b, c, d, e, 4);
323  R2_(block, e, a, b, c, d, 5);
324  R2_(block, d, e, a, b, c, 6);
325  R2_(block, c, d, e, a, b, 7);
326  R2_(block, b, c, d, e, a, 8);
327  R2_(block, a, b, c, d, e, 9);
328  R2_(block, e, a, b, c, d, 10);
329  R2_(block, d, e, a, b, c, 11);
330  R2_(block, c, d, e, a, b, 12);
331  R2_(block, b, c, d, e, a, 13);
332  R2_(block, a, b, c, d, e, 14);
333  R2_(block, e, a, b, c, d, 15);
334  R2_(block, d, e, a, b, c, 0);
335  R2_(block, c, d, e, a, b, 1);
336  R2_(block, b, c, d, e, a, 2);
337  R2_(block, a, b, c, d, e, 3);
338  R2_(block, e, a, b, c, d, 4);
339  R2_(block, d, e, a, b, c, 5);
340  R2_(block, c, d, e, a, b, 6);
341  R2_(block, b, c, d, e, a, 7);
342  R3_(block, a, b, c, d, e, 8);
343  R3_(block, e, a, b, c, d, 9);
344  R3_(block, d, e, a, b, c, 10);
345  R3_(block, c, d, e, a, b, 11);
346  R3_(block, b, c, d, e, a, 12);
347  R3_(block, a, b, c, d, e, 13);
348  R3_(block, e, a, b, c, d, 14);
349  R3_(block, d, e, a, b, c, 15);
350  R3_(block, c, d, e, a, b, 0);
351  R3_(block, b, c, d, e, a, 1);
352  R3_(block, a, b, c, d, e, 2);
353  R3_(block, e, a, b, c, d, 3);
354  R3_(block, d, e, a, b, c, 4);
355  R3_(block, c, d, e, a, b, 5);
356  R3_(block, b, c, d, e, a, 6);
357  R3_(block, a, b, c, d, e, 7);
358  R3_(block, e, a, b, c, d, 8);
359  R3_(block, d, e, a, b, c, 9);
360  R3_(block, c, d, e, a, b, 10);
361  R3_(block, b, c, d, e, a, 11);
362  R4_(block, a, b, c, d, e, 12);
363  R4_(block, e, a, b, c, d, 13);
364  R4_(block, d, e, a, b, c, 14);
365  R4_(block, c, d, e, a, b, 15);
366  R4_(block, b, c, d, e, a, 0);
367  R4_(block, a, b, c, d, e, 1);
368  R4_(block, e, a, b, c, d, 2);
369  R4_(block, d, e, a, b, c, 3);
370  R4_(block, c, d, e, a, b, 4);
371  R4_(block, b, c, d, e, a, 5);
372  R4_(block, a, b, c, d, e, 6);
373  R4_(block, e, a, b, c, d, 7);
374  R4_(block, d, e, a, b, c, 8);
375  R4_(block, c, d, e, a, b, 9);
376  R4_(block, b, c, d, e, a, 10);
377  R4_(block, a, b, c, d, e, 11);
378  R4_(block, e, a, b, c, d, 12);
379  R4_(block, d, e, a, b, c, 13);
380  R4_(block, c, d, e, a, b, 14);
381  R4_(block, b, c, d, e, a, 15);
382 
383  /* Add the working vars back into digest[] */
384  digest_[0] += a;
385  digest_[1] += b;
386  digest_[2] += c;
387  digest_[3] += d;
388  digest_[4] += e;
389 
390  /* Count the number of transformations */
391  ++transforms_;
392 }
393 
394 void SHA1::buffer_to_block_(const std::string& buffer, uint32_t block[SHA1::BlockInts])
395 {
396  // Convert the std::string (byte buffer) to a uint32_t array (MSB)
397 
398  for (size_t i = 0; i < SHA1::BlockInts; i++) {
399  block[i] = ( buffer[4*i+3] & 0xff )
400  | ( buffer[4*i+2] & 0xff ) << 8
401  | ( buffer[4*i+1] & 0xff ) << 16
402  | ( buffer[4*i+0] & 0xff ) << 24;
403  }
404 }
405 
406 } // namespace utils
407 } // namespace genesis
genesis::utils::SHA1::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: sha1.cpp:72
genesis::utils::SHA1::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha1.cpp:176
genesis::utils::SHA1::BlockInts
static const size_t BlockInts
Definition: sha1.hpp:73
genesis::utils::SHA1::DigestType
std::array< uint32_t, 5 > DigestType
Store a SHA1 digest.
Definition: sha1.hpp:83
genesis::utils::SHA1::BlockBytes
static const size_t BlockBytes
Definition: sha1.hpp:74
sha1.hpp
genesis::utils::SHA1::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: sha1.cpp:65
genesis::utils::SHA1::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha1.cpp:124
genesis::utils::SHA1::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: sha1.cpp:129
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::SHA1::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: sha1.cpp:79
genesis::utils::SHA1::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha1.cpp:182
genesis::utils::InputBuffer
Definition: input_buffer.hpp:49
input_buffer.hpp
genesis::utils::SHA1::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: sha1.cpp:91
genesis::utils::SHA1
Calculate SHA1 hashes for strings and files.
Definition: sha1.hpp:64
genesis::utils::SHA1::SHA1
SHA1()
Initialize the object for use.
Definition: sha1.cpp:56