A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sha1.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <cinttypes>
35 #include <cstdio>
36 #include <fstream>
37 #include <iomanip>
38 #include <iostream>
39 #include <sstream>
40 #include <stdexcept>
41 
42 namespace genesis {
43 namespace utils {
44 
45 // ================================================================================================
46 // Constructors and Rule of Five
47 // ================================================================================================
48 
53 {
54  reset_();
55 }
56 
57 // ================================================================================================
58 // Member Functions
59 // ================================================================================================
60 
64 void SHA1::update( std::string const& s )
65 {
66  std::istringstream is(s);
67  update(is);
68 }
69 
73 void SHA1::update(std::istream& is)
74 {
75  while (true) {
76  char sbuf[SHA1::BlockBytes];
77  is.read(sbuf, SHA1::BlockBytes - buffer_.size());
78  buffer_.append(sbuf, is.gcount());
79  if (buffer_.size() != SHA1::BlockBytes) {
80  return;
81  }
82  uint32_t block[SHA1::BlockInts];
83  buffer_to_block_(buffer_, block);
84  transform_( block );
85  buffer_.clear();
86  }
87 }
88 
92 std::string SHA1::final_hex()
93 {
94  // Calculate digest, also reset for next use.
95  return digest_to_hex( final_digest() );
96 }
97 
102 {
103  /* Total number of hashed bits */
104  uint64_t total_bits = (transforms_*SHA1::BlockBytes + buffer_.size()) * 8;
105 
106  /* Padding */
107  buffer_ += static_cast<char>( 0x80 );
108  size_t orig_size = buffer_.size();
109  while (buffer_.size() < SHA1::BlockBytes) {
110  buffer_ += static_cast<char>( 0x00 );
111  }
112 
113  uint32_t block[SHA1::BlockInts];
114  buffer_to_block_(buffer_, block);
115 
116  if (orig_size > SHA1::BlockBytes - 8) {
117  transform_( block );
118  for (size_t i = 0; i < SHA1::BlockInts - 2; i++) {
119  block[i] = 0;
120  }
121  }
122 
123  /* Append total_bits, split this uint64_t into two uint32_t */
124  block[SHA1::BlockInts - 1] = total_bits;
125  block[SHA1::BlockInts - 2] = (total_bits >> 32);
126  transform_( block );
127 
128  auto result = digest_;
129 
130  /* Reset for next run */
131  reset_();
132 
133  return result;
134 }
135 
139 std::string SHA1::from_file_hex( std::string const& filename )
140 {
141  std::ifstream stream( filename.c_str(), std::ios::binary );
142  SHA1 checksum;
143  checksum.update(stream);
144  return checksum.final_hex();
145 }
146 
150 SHA1::DigestType SHA1::from_file_digest( std::string const& filename )
151 {
152  std::ifstream stream( filename.c_str(), std::ios::binary );
153  SHA1 checksum;
154  checksum.update(stream);
155  return checksum.final_digest();
156 }
157 
161 std::string SHA1::from_string_hex( std::string const& input )
162 {
163  SHA1 checksum;
164  checksum.update( input );
165  return checksum.final_hex();
166 }
167 
171 SHA1::DigestType SHA1::from_string_digest( std::string const& input )
172 {
173  SHA1 checksum;
174  checksum.update( input );
175  return checksum.final_digest();
176 }
177 
178 std::string SHA1::digest_to_hex( SHA1::DigestType const& digest )
179 {
180  /* Hex std::string */
181  std::ostringstream result;
182  for (size_t i = 0; i < digest.size(); ++i) {
183  result << std::hex << std::setfill('0') << std::setw(8);
184  result << digest[i];
185  }
186 
187  return result.str();
188 }
189 
190 SHA1::DigestType SHA1::hex_to_digest( std::string const& hex )
191 {
192  // Safety first!
193  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
194  return std::isxdigit( c );
195  });
196  if( hex.size() != 40 || ! all_hex ) {
197  throw std::runtime_error( "Invalid SHA1 hex string." );
198  }
199 
200  // The following test was introduced to check the scanf format "%8x",
201  // which just is an "unsigned int", which is not a fixed size.
202  // We now use the SCNxN typedefs that offer fixed with replacements, see
203  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
204 
205  // Make sure that the scan works! Need to have 32 bit type.
206  // static_assert(
207  // sizeof( unsigned int ) == 4,
208  // "Cannot compile SHA1::hex_to_digest() with sizeof( unsigned int ) != 4"
209  // );
210 
211  // Convert.
212  SHA1::DigestType result;
213  for (size_t i = 0; i < result.size(); ++i) {
214  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
215  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
216  if( n != 1 ) {
217  throw std::runtime_error( "Invalid SHA1 hex string." );
218  }
219  }
220 
221  return result;
222 }
223 
224 // ================================================================================================
225 // Internal Functions
226 // ================================================================================================
227 
228 void SHA1::reset_()
229 {
230  /* SHA1 initialization constants */
231  digest_[0] = 0x67452301;
232  digest_[1] = 0xefcdab89;
233  digest_[2] = 0x98badcfe;
234  digest_[3] = 0x10325476;
235  digest_[4] = 0xc3d2e1f0;
236 
237  /* Reset counters */
238  buffer_ = "";
239  transforms_ = 0;
240 }
241 
242 uint32_t SHA1::rol_(const uint32_t value, const size_t bits)
243 {
244  return (value << bits) | (value >> (32 - bits));
245 }
246 
247 uint32_t SHA1::blk_(const uint32_t block[SHA1::BlockInts], const size_t i)
248 {
249  return rol_(block[(i+13)&15] ^ block[(i+8)&15] ^ block[(i+2)&15] ^ block[i], 1);
250 }
251 
252 void SHA1::R0_(
253  const uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
254  const uint32_t y, uint32_t& z, const size_t i
255 ) {
256  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
257  w = rol_(w, 30);
258 }
259 
260 void SHA1::R1_(
261  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
262  const uint32_t y, uint32_t& z, const size_t i
263 ) {
264  block[i] = blk_(block, i);
265  z += ((w&(x^y))^y) + block[i] + 0x5a827999 + rol_(v, 5);
266  w = rol_(w, 30);
267 }
268 
269 void SHA1::R2_(
270  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
271  const uint32_t y, uint32_t& z, const size_t i
272 ) {
273  block[i] = blk_(block, i);
274  z += (w^x^y) + block[i] + 0x6ed9eba1 + rol_(v, 5);
275  w = rol_(w, 30);
276 }
277 
278 void SHA1::R3_(
279  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
280  const uint32_t y, uint32_t& z, const size_t i
281 ) {
282  block[i] = blk_(block, i);
283  z += (((w|x)&y)|(w&x)) + block[i] + 0x8f1bbcdc + rol_(v, 5);
284  w = rol_(w, 30);
285 }
286 
287 
288 void SHA1::R4_(
289  uint32_t block[SHA1::BlockInts], const uint32_t v, uint32_t& w, const uint32_t x,
290  const uint32_t y, uint32_t& z, const size_t i
291 ) {
292  block[i] = blk_(block, i);
293  z += (w^x^y) + block[i] + 0xca62c1d6 + rol_(v, 5);
294  w = rol_(w, 30);
295 }
296 
300 void SHA1::transform_( uint32_t block[SHA1::BlockInts] )
301 {
302  // Copy digest[] to working vars
303  uint32_t a = digest_[0];
304  uint32_t b = digest_[1];
305  uint32_t c = digest_[2];
306  uint32_t d = digest_[3];
307  uint32_t e = digest_[4];
308 
309  // 4 rounds of 20 operations each. Loop unrolled.
310  R0_(block, a, b, c, d, e, 0);
311  R0_(block, e, a, b, c, d, 1);
312  R0_(block, d, e, a, b, c, 2);
313  R0_(block, c, d, e, a, b, 3);
314  R0_(block, b, c, d, e, a, 4);
315  R0_(block, a, b, c, d, e, 5);
316  R0_(block, e, a, b, c, d, 6);
317  R0_(block, d, e, a, b, c, 7);
318  R0_(block, c, d, e, a, b, 8);
319  R0_(block, b, c, d, e, a, 9);
320  R0_(block, a, b, c, d, e, 10);
321  R0_(block, e, a, b, c, d, 11);
322  R0_(block, d, e, a, b, c, 12);
323  R0_(block, c, d, e, a, b, 13);
324  R0_(block, b, c, d, e, a, 14);
325  R0_(block, a, b, c, d, e, 15);
326  R1_(block, e, a, b, c, d, 0);
327  R1_(block, d, e, a, b, c, 1);
328  R1_(block, c, d, e, a, b, 2);
329  R1_(block, b, c, d, e, a, 3);
330  R2_(block, a, b, c, d, e, 4);
331  R2_(block, e, a, b, c, d, 5);
332  R2_(block, d, e, a, b, c, 6);
333  R2_(block, c, d, e, a, b, 7);
334  R2_(block, b, c, d, e, a, 8);
335  R2_(block, a, b, c, d, e, 9);
336  R2_(block, e, a, b, c, d, 10);
337  R2_(block, d, e, a, b, c, 11);
338  R2_(block, c, d, e, a, b, 12);
339  R2_(block, b, c, d, e, a, 13);
340  R2_(block, a, b, c, d, e, 14);
341  R2_(block, e, a, b, c, d, 15);
342  R2_(block, d, e, a, b, c, 0);
343  R2_(block, c, d, e, a, b, 1);
344  R2_(block, b, c, d, e, a, 2);
345  R2_(block, a, b, c, d, e, 3);
346  R2_(block, e, a, b, c, d, 4);
347  R2_(block, d, e, a, b, c, 5);
348  R2_(block, c, d, e, a, b, 6);
349  R2_(block, b, c, d, e, a, 7);
350  R3_(block, a, b, c, d, e, 8);
351  R3_(block, e, a, b, c, d, 9);
352  R3_(block, d, e, a, b, c, 10);
353  R3_(block, c, d, e, a, b, 11);
354  R3_(block, b, c, d, e, a, 12);
355  R3_(block, a, b, c, d, e, 13);
356  R3_(block, e, a, b, c, d, 14);
357  R3_(block, d, e, a, b, c, 15);
358  R3_(block, c, d, e, a, b, 0);
359  R3_(block, b, c, d, e, a, 1);
360  R3_(block, a, b, c, d, e, 2);
361  R3_(block, e, a, b, c, d, 3);
362  R3_(block, d, e, a, b, c, 4);
363  R3_(block, c, d, e, a, b, 5);
364  R3_(block, b, c, d, e, a, 6);
365  R3_(block, a, b, c, d, e, 7);
366  R3_(block, e, a, b, c, d, 8);
367  R3_(block, d, e, a, b, c, 9);
368  R3_(block, c, d, e, a, b, 10);
369  R3_(block, b, c, d, e, a, 11);
370  R4_(block, a, b, c, d, e, 12);
371  R4_(block, e, a, b, c, d, 13);
372  R4_(block, d, e, a, b, c, 14);
373  R4_(block, c, d, e, a, b, 15);
374  R4_(block, b, c, d, e, a, 0);
375  R4_(block, a, b, c, d, e, 1);
376  R4_(block, e, a, b, c, d, 2);
377  R4_(block, d, e, a, b, c, 3);
378  R4_(block, c, d, e, a, b, 4);
379  R4_(block, b, c, d, e, a, 5);
380  R4_(block, a, b, c, d, e, 6);
381  R4_(block, e, a, b, c, d, 7);
382  R4_(block, d, e, a, b, c, 8);
383  R4_(block, c, d, e, a, b, 9);
384  R4_(block, b, c, d, e, a, 10);
385  R4_(block, a, b, c, d, e, 11);
386  R4_(block, e, a, b, c, d, 12);
387  R4_(block, d, e, a, b, c, 13);
388  R4_(block, c, d, e, a, b, 14);
389  R4_(block, b, c, d, e, a, 15);
390 
391  /* Add the working vars back into digest[] */
392  digest_[0] += a;
393  digest_[1] += b;
394  digest_[2] += c;
395  digest_[3] += d;
396  digest_[4] += e;
397 
398  /* Count the number of transformations */
399  ++transforms_;
400 }
401 
405 void SHA1::buffer_to_block_(const std::string& buffer, uint32_t block[SHA1::BlockInts])
406 {
407  for (size_t i = 0; i < SHA1::BlockInts; i++) {
408  block[i] = ( buffer[4*i+3] & 0xff )
409  | ( buffer[4*i+2] & 0xff ) << 8
410  | ( buffer[4*i+1] & 0xff ) << 16
411  | ( buffer[4*i+0] & 0xff ) << 24;
412  }
413 }
414 
415 } // namespace utils
416 } // namespace genesis
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha1.cpp:101
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha1.cpp:92
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: sha1.cpp:171
std::array< uint32_t, 5 > DigestType
Store a SHA1 digest.
Definition: sha1.hpp:79
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: sha1.cpp:161
static const size_t BlockInts
Definition: sha1.hpp:69
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: sha1.cpp:150
static const size_t BlockBytes
Definition: sha1.hpp:70
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: sha1.cpp:139
static std::string digest_to_hex(DigestType const &digest)
Definition: sha1.cpp:178
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: sha1.cpp:64
Calculate SHA1 hashes for strings and files.
Definition: sha1.hpp:60
SHA1()
Initialize the object for use.
Definition: sha1.cpp:52
static DigestType hex_to_digest(std::string const &hex)
Definition: sha1.cpp:190