A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sha1.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <cinttypes>
35 #include <cstdio>
36 #include <fstream>
37 #include <iomanip>
38 #include <iostream>
39 #include <sstream>
40 #include <stdexcept>
41 
42 namespace genesis {
43 namespace utils {
44 
45 // ================================================================================================
46 // Constructors and Rule of Five
47 // ================================================================================================
48 
50 {
51  reset_();
52 }
53 
54 // ================================================================================================
55 // Member Functions
56 // ================================================================================================
57 
59 {
60  reset_();
61 }
62 
63 void SHA1::update( std::string const& s )
64 {
65  std::istringstream is(s);
66  update(is);
67 }
68 
69 void SHA1::update(std::istream& is)
70 {
71  while (true) {
72  char sbuf[SHA1::BlockBytes];
73  is.read(sbuf, SHA1::BlockBytes - buffer_.size());
74  buffer_.append(sbuf, is.gcount());
75  if (buffer_.size() != SHA1::BlockBytes) {
76  return;
77  }
78  uint32_t block[SHA1::BlockInts];
79  buffer_to_block_(buffer_, block);
80  transform_( block );
81  buffer_.clear();
82  }
83 }
84 
85 std::string SHA1::final_hex()
86 {
87  // Calculate digest, also reset for next use.
88  return digest_to_hex( final_digest() );
89 }
90 
92 {
93  /* Total number of hashed bits */
94  uint64_t total_bits = (transforms_*SHA1::BlockBytes + buffer_.size()) * 8;
95 
96  /* Padding */
97  buffer_ += static_cast<char>( 0x80 );
98  size_t orig_size = buffer_.size();
99  while (buffer_.size() < SHA1::BlockBytes) {
100  buffer_ += static_cast<char>( 0x00 );
101  }
102 
103  uint32_t block[SHA1::BlockInts];
104  buffer_to_block_(buffer_, block);
105 
106  if (orig_size > SHA1::BlockBytes - 8) {
107  transform_( block );
108  for (size_t i = 0; i < SHA1::BlockInts - 2; i++) {
109  block[i] = 0;
110  }
111  }
112 
113  /* Append total_bits, split this uint64_t into two uint32_t */
114  block[SHA1::BlockInts - 1] = total_bits;
115  block[SHA1::BlockInts - 2] = (total_bits >> 32);
116  transform_( block );
117 
118  auto result = digest_;
119 
120  /* Reset for next run */
121  reset_();
122 
123  return result;
124 }
125 
126 std::string SHA1::from_file_hex( std::string const& filename )
127 {
128  std::ifstream stream( filename.c_str(), std::ios::binary );
129  SHA1 checksum;
130  checksum.update(stream);
131  return checksum.final_hex();
132 }
133 
134 SHA1::DigestType SHA1::from_file_digest( std::string const& filename )
135 {
136  std::ifstream stream( filename.c_str(), std::ios::binary );
137  SHA1 checksum;
138  checksum.update(stream);
139  return checksum.final_digest();
140 }
141 
142 std::string SHA1::from_string_hex( std::string const& input )
143 {
144  SHA1 checksum;
145  checksum.update( input );
146  return checksum.final_hex();
147 }
148 
149 SHA1::DigestType SHA1::from_string_digest( std::string const& input )
150 {
151  SHA1 checksum;
152  checksum.update( input );
153  return checksum.final_digest();
154 }
155 
156 std::string SHA1::from_stream_hex( std::istream& is )
157 {
158  SHA1 checksum;
159  checksum.update(is);
160  return checksum.final_hex();
161 }
162 
164 {
165  SHA1 checksum;
166  checksum.update(is);
167  return checksum.final_digest();
168 }
169 
170 std::string SHA1::digest_to_hex( SHA1::DigestType const& digest )
171 {
172  /* Hex std::string */
173  std::ostringstream result;
174  for (size_t i = 0; i < digest.size(); ++i) {
175  result << std::hex << std::setfill('0') << std::setw(8);
176  result << digest[i];
177  }
178 
179  return result.str();
180 }
181 
182 SHA1::DigestType SHA1::hex_to_digest( std::string const& hex )
183 {
184  // Safety first!
185  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
186  return std::isxdigit( c );
187  });
188  if( hex.size() != 40 || ! all_hex ) {
189  throw std::runtime_error( "Invalid SHA1 hex string." );
190  }
191 
192  // The following test was introduced to check the scanf format "%8x",
193  // which just is an "unsigned int", which is not a fixed size.
194  // We now use the SCNxN typedefs that offer fixed with replacements, see
195  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
196 
197  // Make sure that the scan works! Need to have 32 bit type.
198  // static_assert(
199  // sizeof( unsigned int ) == 4,
200  // "Cannot compile SHA1::hex_to_digest() with sizeof( unsigned int ) != 4"
201  // );
202 
203  // Convert.
204  SHA1::DigestType result;
205  for (size_t i = 0; i < result.size(); ++i) {
206  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
207  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
208  if( n != 1 ) {
209  throw std::runtime_error( "Invalid SHA1 hex string." );
210  }
211  }
212 
213  return result;
214 }
215 
216 // ================================================================================================
217 // Internal Functions
218 // ================================================================================================
219 
220 void SHA1::reset_()
221 {
222  /* SHA1 initialization constants */
223  digest_[0] = 0x67452301;
224  digest_[1] = 0xefcdab89;
225  digest_[2] = 0x98badcfe;
226  digest_[3] = 0x10325476;
227  digest_[4] = 0xc3d2e1f0;
228 
229  /* Reset counters */
230  buffer_ = "";
231  transforms_ = 0;
232 }
233 
234 uint32_t SHA1::rol_(const uint32_t value, const size_t bits)
235 {
236  return (value << bits) | (value >> (32 - bits));
237 }
238 
239 uint32_t SHA1::blk_(const uint32_t block[SHA1::BlockInts], const size_t i)
240 {
241  return rol_(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i], 1);
242 }
243 
244 void SHA1::R0_(
245  const uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
246  const uint32_t y, uint32_t& z, const size_t i
247 ) {
248  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
249  w = rol_(w, 30);
250 }
251 
252 void SHA1::R1_(
253  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
254  const uint32_t y, uint32_t& z, const size_t i
255 ) {
256  block[i] = blk_(block, i);
257  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
258  w = rol_(w, 30);
259 }
260 
261 void SHA1::R2_(
262  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
263  const uint32_t y, uint32_t& z, const size_t i
264 ) {
265  block[i] = blk_(block, i);
266  z += (w^x^y) + block[i] + 0x6ed9eba1 + rol_(v, 5);
267  w = rol_(w, 30);
268 }
269 
270 void SHA1::R3_(
271  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
272  const uint32_t y, uint32_t& z, const size_t i
273 ) {
274  block[i] = blk_(block, i);
275  z += (((w|x)&y)|(w&x)) + block[i] + 0x8f1bbcdc + rol_(v, 5);
276  w = rol_(w, 30);
277 }
278 
279 
280 void SHA1::R4_(
281  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
282  const uint32_t y, uint32_t& z, const size_t i
283 ) {
284  block[i] = blk_(block, i);
285  z += (w^x^y) + block[i] + 0xca62c1d6 + rol_(v, 5);
286  w = rol_(w, 30);
287 }
288 
289 void SHA1::transform_( uint32_t block[SHA1::BlockInts] )
290 {
291  // Hash a single 512-bit block. This is the core of the algorithm.
292 
293  // Copy digest[] to working vars
294  uint32_t a = digest_[0];
295  uint32_t b = digest_[1];
296  uint32_t c = digest_[2];
297  uint32_t d = digest_[3];
298  uint32_t e = digest_[4];
299 
300  // 4 rounds of 20 operations each. Loop unrolled.
301  R0_(block, a, b, c, d, e, 0);
302  R0_(block, e, a, b, c, d, 1);
303  R0_(block, d, e, a, b, c, 2);
304  R0_(block, c, d, e, a, b, 3);
305  R0_(block, b, c, d, e, a, 4);
306  R0_(block, a, b, c, d, e, 5);
307  R0_(block, e, a, b, c, d, 6);
308  R0_(block, d, e, a, b, c, 7);
309  R0_(block, c, d, e, a, b, 8);
310  R0_(block, b, c, d, e, a, 9);
311  R0_(block, a, b, c, d, e, 10);
312  R0_(block, e, a, b, c, d, 11);
313  R0_(block, d, e, a, b, c, 12);
314  R0_(block, c, d, e, a, b, 13);
315  R0_(block, b, c, d, e, a, 14);
316  R0_(block, a, b, c, d, e, 15);
317  R1_(block, e, a, b, c, d, 0);
318  R1_(block, d, e, a, b, c, 1);
319  R1_(block, c, d, e, a, b, 2);
320  R1_(block, b, c, d, e, a, 3);
321  R2_(block, a, b, c, d, e, 4);
322  R2_(block, e, a, b, c, d, 5);
323  R2_(block, d, e, a, b, c, 6);
324  R2_(block, c, d, e, a, b, 7);
325  R2_(block, b, c, d, e, a, 8);
326  R2_(block, a, b, c, d, e, 9);
327  R2_(block, e, a, b, c, d, 10);
328  R2_(block, d, e, a, b, c, 11);
329  R2_(block, c, d, e, a, b, 12);
330  R2_(block, b, c, d, e, a, 13);
331  R2_(block, a, b, c, d, e, 14);
332  R2_(block, e, a, b, c, d, 15);
333  R2_(block, d, e, a, b, c, 0);
334  R2_(block, c, d, e, a, b, 1);
335  R2_(block, b, c, d, e, a, 2);
336  R2_(block, a, b, c, d, e, 3);
337  R2_(block, e, a, b, c, d, 4);
338  R2_(block, d, e, a, b, c, 5);
339  R2_(block, c, d, e, a, b, 6);
340  R2_(block, b, c, d, e, a, 7);
341  R3_(block, a, b, c, d, e, 8);
342  R3_(block, e, a, b, c, d, 9);
343  R3_(block, d, e, a, b, c, 10);
344  R3_(block, c, d, e, a, b, 11);
345  R3_(block, b, c, d, e, a, 12);
346  R3_(block, a, b, c, d, e, 13);
347  R3_(block, e, a, b, c, d, 14);
348  R3_(block, d, e, a, b, c, 15);
349  R3_(block, c, d, e, a, b, 0);
350  R3_(block, b, c, d, e, a, 1);
351  R3_(block, a, b, c, d, e, 2);
352  R3_(block, e, a, b, c, d, 3);
353  R3_(block, d, e, a, b, c, 4);
354  R3_(block, c, d, e, a, b, 5);
355  R3_(block, b, c, d, e, a, 6);
356  R3_(block, a, b, c, d, e, 7);
357  R3_(block, e, a, b, c, d, 8);
358  R3_(block, d, e, a, b, c, 9);
359  R3_(block, c, d, e, a, b, 10);
360  R3_(block, b, c, d, e, a, 11);
361  R4_(block, a, b, c, d, e, 12);
362  R4_(block, e, a, b, c, d, 13);
363  R4_(block, d, e, a, b, c, 14);
364  R4_(block, c, d, e, a, b, 15);
365  R4_(block, b, c, d, e, a, 0);
366  R4_(block, a, b, c, d, e, 1);
367  R4_(block, e, a, b, c, d, 2);
368  R4_(block, d, e, a, b, c, 3);
369  R4_(block, c, d, e, a, b, 4);
370  R4_(block, b, c, d, e, a, 5);
371  R4_(block, a, b, c, d, e, 6);
372  R4_(block, e, a, b, c, d, 7);
373  R4_(block, d, e, a, b, c, 8);
374  R4_(block, c, d, e, a, b, 9);
375  R4_(block, b, c, d, e, a, 10);
376  R4_(block, a, b, c, d, e, 11);
377  R4_(block, e, a, b, c, d, 12);
378  R4_(block, d, e, a, b, c, 13);
379  R4_(block, c, d, e, a, b, 14);
380  R4_(block, b, c, d, e, a, 15);
381 
382  /* Add the working vars back into digest[] */
383  digest_[0] += a;
384  digest_[1] += b;
385  digest_[2] += c;
386  digest_[3] += d;
387  digest_[4] += e;
388 
389  /* Count the number of transformations */
390  ++transforms_;
391 }
392 
393 void SHA1::buffer_to_block_(const std::string& buffer, uint32_t block[SHA1::BlockInts])
394 {
395  // Convert the std::string (byte buffer) to a uint32_t array (MSB)
396 
397  for (size_t i = 0; i < SHA1::BlockInts; i++) {
398  block[i] = ( buffer[4*i+3] & 0xff )
399  | ( buffer[4*i+2] & 0xff ) << 8
400  | ( buffer[4*i+1] & 0xff ) << 16
401  | ( buffer[4*i+0] & 0xff ) << 24;
402  }
403 }
404 
405 } // namespace utils
406 } // namespace genesis
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha1.cpp:91
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha1.cpp:85
static std::string from_stream_hex(std::istream &is)
Calculate the checksum for the content of a stream.
Definition: sha1.cpp:156
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: sha1.cpp:149
static DigestType from_stream_digest(std::istream &is)
Calculate the hash digest for the content of a stream.
Definition: sha1.cpp:163
std::array< uint32_t, 5 > DigestType
Store a SHA1 digest.
Definition: sha1.hpp:79
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: sha1.cpp:142
static const size_t BlockInts
Definition: sha1.hpp:69
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: sha1.cpp:134
static const size_t BlockBytes
Definition: sha1.hpp:70
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha1.cpp:58
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: sha1.cpp:126
static std::string digest_to_hex(DigestType const &digest)
Definition: sha1.cpp:170
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: sha1.cpp:63
Calculate SHA1 hashes for strings and files.
Definition: sha1.hpp:60
SHA1()
Initialize the object for use.
Definition: sha1.cpp:49
static DigestType hex_to_digest(std::string const &hex)
Definition: sha1.cpp:182