A toolkit for working with phylogenetic data.
v0.24.0
md5.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  MD5 License
34  =================================================================
35 
36  The implementation is based on http://www.zedwood.com/article/cpp-md5-function,
37  which itself was converted to C++ class by Frank Thilo (thilo@unix-ag.org)
38  for bzflag (http://www.bzflag.org), based on:
39 
40  md5.h and md5.c
41  reference implemantion of RFC 1321
42 
43  Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
44  rights reserved.
45 
46  License to copy and use this software is granted provided that it
47  is identified as the "RSA Data Security, Inc. MD5 Message-Digest
48  Algorithm" in all material mentioning or referencing this software
49  or this function.
50 
51  License is also granted to make and use derivative works provided
52  that such works are identified as "derived from the RSA Data
53  Security, Inc. MD5 Message-Digest Algorithm" in all material
54  mentioning or referencing the derived work.
55 
56  RSA Data Security, Inc. makes no representations concerning either
57  the merchantability of this software or the suitability of this
58  software for any particular purpose. It is provided "as is"
59  without express or implied warranty of any kind.
60 
61  These notices must be retained in any copies of any part of this
62  documentation and/or software.
63 */
64 
66 
68 
69 #include <algorithm>
70 #include <cinttypes>
71 #include <cstdio>
72 #include <cstring>
73 #include <fstream>
74 #include <iomanip>
75 #include <iostream>
76 #include <sstream>
77 #include <stdexcept>
78 
79 namespace genesis {
80 namespace utils {
81 
82 // ================================================================================================
83 // Constructors and Rule of Five
84 // ================================================================================================
85 
87 {
88  reset_();
89 }
90 
91 // ================================================================================================
92 // Full Hashing
93 // ================================================================================================
94 
95 std::string MD5::read_hex( std::shared_ptr<BaseInputSource> source )
96 {
97  MD5 checksum;
98  checksum.update( source );
99  return checksum.final_hex();
100 }
101 
102 MD5::DigestType MD5::read_digest( std::shared_ptr<BaseInputSource> source )
103 {
104  MD5 checksum;
105  checksum.update( source );
106  return checksum.final_digest();
107 }
108 
109 std::string MD5::digest_to_hex( MD5::DigestType const& digest )
110 {
111  /* Hex std::string */
112  std::ostringstream result;
113  for (size_t i = 0; i < digest.size(); ++i) {
114  result << std::hex << std::setfill('0') << std::setw(2);
115  result << static_cast<int>( digest[i] );
116  }
117 
118  return result.str();
119 }
120 
121 MD5::DigestType MD5::hex_to_digest( std::string const& hex )
122 {
123  // Safety first!
124  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
125  return std::isxdigit( c );
126  });
127  if( hex.size() != 32 || ! all_hex ) {
128  throw std::runtime_error( "Invalid MD5 hex string." );
129  }
130 
131  // The following test was introduced to check the scanf format "%2hhx",
132  // which just is an "unsigned char", which is not a fixed size.
133  // We now use the SCNxN typedefs that offer fixed width replacements, see
134  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
135 
136  // Make sure that the scan works!
137  // static_assert(
138  // sizeof( unsigned char ) == 1,
139  // "Cannot compile MD5::hex_to_digest() with sizeof( unsigned char ) != 1"
140  // );
141 
142  // Convert.
143  MD5::DigestType result;
144  for (size_t i = 0; i < result.size(); ++i) {
145  // auto const n = sscanf( &hex[ 2 * i ], "%2hhx", &(result[i]) );
146  auto const n = sscanf( &hex[ 2 * i ], "%2" SCNx8, &(result[i]) );
147  if( n != 1 ) {
148  throw std::runtime_error( "Invalid MD5 hex string." );
149  }
150  }
151 
152  return result;
153 }
154 
155 // ================================================================================================
156 // Iterative Hashing
157 // ================================================================================================
158 
160 {
161  reset_();
162 }
163 
164 void MD5::update( std::shared_ptr<BaseInputSource> source )
165 {
166  auto ib = InputBuffer( source );
167  char sbuf[MD5::BlockSize];
168 
169  while (true) {
170 
171  // Read a block and use it for an update.
172  auto cnt = ib.read( sbuf, MD5::BlockSize );
173  update( sbuf, cnt );
174 
175  // If we didn't get a full block, the input is done.
176  if( cnt != MD5::BlockSize ) {
177  return;
178  }
179  }
180 }
181 
182 void MD5::update( std::string const& s )
183 {
184  update( s.c_str(), s.size() );
185 }
186 
187 void MD5::update(std::istream& is)
188 {
189  char sbuf[MD5::BlockSize];
190  while (true) {
191 
192  // Read a block and use it for an update.
193  is.read( sbuf, MD5::BlockSize );
194  size_t cnt = is.gcount();
195  update( sbuf, cnt );
196 
197  // If we didn't get a full block, the stream is done.
198  if( cnt != MD5::BlockSize ) {
199  return;
200  }
201  }
202 }
203 
204 void MD5::update( char const* input, MD5::size_type length )
205 {
206  // Ugly conversion, but still better than the silent one used in the original code.
207  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
208  update_( in_uchar, length);
209 }
210 
211 std::string MD5::final_hex()
212 {
213  // Calculate digest, also reset for next use.
214  return digest_to_hex( final_digest() );
215 }
216 
218 {
219  static unsigned char padding[64] = {
220  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
221  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
223  };
224 
225  // Save number of bits
226  unsigned char bits[8];
227  encode_(bits, count_, 8);
228 
229  // pad out to 56 mod 64.
230  size_type index = count_[0] / 8 % 64;
231  size_type padLen = (index < 56) ? (56 - index) : (120 - index);
232  update_(padding, padLen);
233 
234  // Append length (before padding)
235  update_(bits, 8);
236 
237  // Store state in digest
238  encode_( digest_.data(), state_, 16 );
239 
240  // Zeroize sensitive information.
241  memset(buffer_, 0, sizeof buffer_);
242  memset(count_, 0, sizeof count_);
243 
244  reset_();
245  return digest_;
246 }
247 
248 // ================================================================================================
249 // Internal Functions
250 // ================================================================================================
251 
252 void MD5::reset_()
253 {
254  count_[0] = 0;
255  count_[1] = 0;
256 
257  // load magic initialization constants.
258  state_[0] = 0x67452301;
259  state_[1] = 0xefcdab89;
260  state_[2] = 0x98badcfe;
261  state_[3] = 0x10325476;
262 }
263 
264 void MD5::update_( unsigned char const* input, size_type length )
265 {
266  // compute number of bytes mod 64
267  MD5::size_type index = count_[0] / 8 % MD5::BlockSize;
268 
269  // Update number of bits
270  if ((count_[0] += (length << 3)) < (length << 3)) {
271  count_[1]++;
272  }
273  count_[1] += (length >> 29);
274 
275  // number of bytes we need to fill in buffer
276  MD5::size_type firstpart = 64 - index;
277  MD5::size_type i;
278 
279  // transform as many times as possible.
280  if (length >= firstpart) {
281  // fill buffer first, transform
282  memcpy( &buffer_[index], input, firstpart );
283  transform_(buffer_);
284 
285  // transform chunks of MD5::BlockSize (64 bytes)
286  for (i = firstpart; i + MD5::BlockSize <= length; i += MD5::BlockSize) {
287  transform_( &input[i] );
288  }
289  index = 0;
290  } else {
291  i = 0;
292  }
293 
294  // buffer remaining input
295  memcpy( &buffer_[index], &input[i], length-i );
296 }
297 
298 inline uint32_t MD5::F_(uint32_t x, uint32_t y, uint32_t z)
299 {
300  return ( x & y ) | ( ~x & z );
301 }
302 
303 inline uint32_t MD5::G_(uint32_t x, uint32_t y, uint32_t z)
304 {
305  return ( x & z ) | ( y & ~z );
306 }
307 
308 inline uint32_t MD5::H_(uint32_t x, uint32_t y, uint32_t z)
309 {
310  return x ^ y ^ z;
311 }
312 
313 inline uint32_t MD5::I_(uint32_t x, uint32_t y, uint32_t z)
314 {
315  return y ^ ( x | ~z );
316 }
317 
318 inline uint32_t MD5::rotate_left_(uint32_t x, int n)
319 {
320  return (x << n) | (x >> (32-n));
321 }
322 
323 inline void MD5::FF_(
324  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
325 ) {
326  a = rotate_left_(a+ F_(b,c,d) + x + ac, s) + b;
327 }
328 
329 inline void MD5::GG_(
330  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
331 ) {
332  a = rotate_left_(a + G_(b,c,d) + x + ac, s) + b;
333 }
334 
335 inline void MD5::HH_(
336  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
337 ) {
338  a = rotate_left_(a + H_(b,c,d) + x + ac, s) + b;
339 }
340 
341 inline void MD5::II_(
342  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
343 ) {
344  a = rotate_left_(a + I_(b,c,d) + x + ac, s) + b;
345 }
346 
347 void MD5::transform_( const uint8_t block[MD5::BlockSize] )
348 {
349  uint32_t a = state_[0], b = state_[1], c = state_[2], d = state_[3], x[16];
350  decode_( x, block, MD5::BlockSize );
351 
352  // Constants for MD5Transform routine.
353  static uint32_t S11 = 7;
354  static uint32_t S12 = 12;
355  static uint32_t S13 = 17;
356  static uint32_t S14 = 22;
357  static uint32_t S21 = 5;
358  static uint32_t S22 = 9;
359  static uint32_t S23 = 14;
360  static uint32_t S24 = 20;
361  static uint32_t S31 = 4;
362  static uint32_t S32 = 11;
363  static uint32_t S33 = 16;
364  static uint32_t S34 = 23;
365  static uint32_t S41 = 6;
366  static uint32_t S42 = 10;
367  static uint32_t S43 = 15;
368  static uint32_t S44 = 21;
369 
370  /* Round 1 */
371  FF_ (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
372  FF_ (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
373  FF_ (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
374  FF_ (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
375  FF_ (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
376  FF_ (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
377  FF_ (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
378  FF_ (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
379  FF_ (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
380  FF_ (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
381  FF_ (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
382  FF_ (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
383  FF_ (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
384  FF_ (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
385  FF_ (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
386  FF_ (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
387 
388  /* Round 2 */
389  GG_ (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
390  GG_ (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
391  GG_ (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
392  GG_ (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
393  GG_ (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
394  GG_ (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
395  GG_ (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
396  GG_ (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
397  GG_ (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
398  GG_ (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
399  GG_ (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
400  GG_ (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
401  GG_ (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
402  GG_ (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
403  GG_ (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
404  GG_ (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
405 
406  /* Round 3 */
407  HH_ (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
408  HH_ (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
409  HH_ (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
410  HH_ (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
411  HH_ (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
412  HH_ (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
413  HH_ (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
414  HH_ (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
415  HH_ (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
416  HH_ (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
417  HH_ (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
418  HH_ (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
419  HH_ (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
420  HH_ (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
421  HH_ (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
422  HH_ (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
423 
424  /* Round 4 */
425  II_ (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
426  II_ (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
427  II_ (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
428  II_ (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
429  II_ (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
430  II_ (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
431  II_ (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
432  II_ (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
433  II_ (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
434  II_ (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
435  II_ (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
436  II_ (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
437  II_ (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
438  II_ (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
439  II_ (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
440  II_ (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
441 
442  state_[0] += a;
443  state_[1] += b;
444  state_[2] += c;
445  state_[3] += d;
446 
447  // Zeroize sensitive information.
448  memset(x, 0, sizeof x);
449 }
450 
451 void MD5::decode_( uint32_t output[], const uint8_t input[], size_type len )
452 {
453  for (unsigned int i = 0, j = 0; j < len; i++, j += 4) {
454  output[i]
455  = ((uint32_t)input[j])
456  | (((uint32_t)input[j+1]) << 8)
457  | (((uint32_t)input[j+2]) << 16)
458  | (((uint32_t)input[j+3]) << 24)
459  ;
460  }
461 }
462 
463 void MD5::encode_( uint8_t output[], const uint32_t input[], size_type len )
464 {
465  for (size_type i = 0, j = 0; j < len; i++, j += 4) {
466  output[j] = input[i] & 0xff;
467  output[j+1] = (input[i] >> 8) & 0xff;
468  output[j+2] = (input[i] >> 16) & 0xff;
469  output[j+3] = (input[i] >> 24) & 0xff;
470  }
471 }
472 
473 } // namespace utils
474 } // namespace genesis
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:88
uint32_t size_type
Definition: md5.hpp:96
MD5()
Initialize the object for use.
Definition: md5.cpp:86
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:211
void update(std::shared_ptr< BaseInputSource > source)
Definition: md5.cpp:164
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: md5.cpp:95
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:107
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: md5.cpp:102
static const size_t BlockSize
Definition: md5.hpp:98
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:159
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:217
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:121
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:109