A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
md5.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  MD5 License
34  =================================================================
35 
36  The implementation is based on http://www.zedwood.com/article/cpp-md5-function,
37  which itself was converted to C++ class by Frank Thilo (thilo@unix-ag.org)
38  for bzflag (http://www.bzflag.org), based on:
39 
40  md5.h and md5.c
41  reference implemantion of RFC 1321
42 
43  Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
44  rights reserved.
45 
46  License to copy and use this software is granted provided that it
47  is identified as the "RSA Data Security, Inc. MD5 Message-Digest
48  Algorithm" in all material mentioning or referencing this software
49  or this function.
50 
51  License is also granted to make and use derivative works provided
52  that such works are identified as "derived from the RSA Data
53  Security, Inc. MD5 Message-Digest Algorithm" in all material
54  mentioning or referencing the derived work.
55 
56  RSA Data Security, Inc. makes no representations concerning either
57  the merchantability of this software or the suitability of this
58  software for any particular purpose. It is provided "as is"
59  without express or implied warranty of any kind.
60 
61  These notices must be retained in any copies of any part of this
62  documentation and/or software.
63 */
64 
66 
67 #include <algorithm>
68 #include <cinttypes>
69 #include <cstdio>
70 #include <cstring>
71 #include <fstream>
72 #include <iomanip>
73 #include <iostream>
74 #include <sstream>
75 #include <stdexcept>
76 
77 namespace genesis {
78 namespace utils {
79 
80 // ================================================================================================
81 // Constructors and Rule of Five
82 // ================================================================================================
83 
88 {
89  reset_();
90 }
91 
92 // ================================================================================================
93 // Member Functions
94 // ================================================================================================
95 
99 void MD5::update( std::string const& s )
100 {
101  update( s.c_str(), s.size() );
102 }
103 
104 void MD5::update(std::istream& is)
105 {
106  char sbuf[MD5::BlockSize];
107  while (true) {
108 
109  // Read a block and use it for an update.
110  is.read( sbuf, MD5::BlockSize );
111  size_t cnt = is.gcount();
112  update( sbuf, cnt );
113 
114  // If we didn't get a full block, the stream is done.
115  if( cnt != MD5::BlockSize ) {
116  return;
117  }
118  }
119 }
120 
121 void MD5::update( char const* input, MD5::size_type length )
122 {
123  // Ugly conversion, but still better than the silent one used in the original code.
124  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
125  update_( in_uchar, length);
126 }
127 
131 std::string MD5::final_hex()
132 {
133  // Calculate digest, also reset for next use.
134  return digest_to_hex( final_digest() );
135 }
136 
141 {
142  static unsigned char padding[64] = {
143  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
144  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
146  };
147 
148  // Save number of bits
149  unsigned char bits[8];
150  encode_(bits, count_, 8);
151 
152  // pad out to 56 mod 64.
153  size_type index = count_[0] / 8 % 64;
154  size_type padLen = (index < 56) ? (56 - index) : (120 - index);
155  update_(padding, padLen);
156 
157  // Append length (before padding)
158  update_(bits, 8);
159 
160  // Store state in digest
161  encode_( digest_.data(), state_, 16 );
162 
163  // Zeroize sensitive information.
164  memset(buffer_, 0, sizeof buffer_);
165  memset(count_, 0, sizeof count_);
166 
167  reset_();
168  return digest_;
169 }
170 
174 std::string MD5::from_file_hex( std::string const& filename )
175 {
176  std::ifstream stream( filename.c_str(), std::ios::binary );
177  MD5 checksum;
178  checksum.update(stream);
179  return checksum.final_hex();
180 }
181 
185 MD5::DigestType MD5::from_file_digest( std::string const& filename )
186 {
187  std::ifstream stream( filename.c_str(), std::ios::binary );
188  MD5 checksum;
189  checksum.update(stream);
190  return checksum.final_digest();
191 }
192 
196 std::string MD5::from_string_hex( std::string const& input )
197 {
198  MD5 checksum;
199  checksum.update( input );
200  return checksum.final_hex();
201 }
202 
206 MD5::DigestType MD5::from_string_digest( std::string const& input )
207 {
208  MD5 checksum;
209  checksum.update( input );
210  return checksum.final_digest();
211 }
212 
213 std::string MD5::digest_to_hex( MD5::DigestType const& digest )
214 {
215  /* Hex std::string */
216  std::ostringstream result;
217  for (size_t i = 0; i < digest.size(); ++i) {
218  result << std::hex << std::setfill('0') << std::setw(2);
219  result << static_cast<int>( digest[i] );
220  }
221 
222  return result.str();
223 }
224 
225 MD5::DigestType MD5::hex_to_digest( std::string const& hex )
226 {
227  // Safety first!
228  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
229  return std::isxdigit( c );
230  });
231  if( hex.size() != 32 || ! all_hex ) {
232  throw std::runtime_error( "Invalid MD5 hex string." );
233  }
234 
235  // The following test was introduced to check the scanf format "%2hhx",
236  // which just is an "unsigned char", which is not a fixed size.
237  // We now use the SCNxN typedefs that offer fixed with replacements, see
238  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
239 
240  // Make sure that the scan works!
241  // static_assert(
242  // sizeof( unsigned char ) == 1,
243  // "Cannot compile MD5::hex_to_digest() with sizeof( unsigned char ) != 1"
244  // );
245 
246  // Convert.
247  MD5::DigestType result;
248  for (size_t i = 0; i < result.size(); ++i) {
249  // auto const n = sscanf( &hex[ 2 * i ], "%2hhx", &(result[i]) );
250  auto const n = sscanf( &hex[ 2 * i ], "%2" SCNx8, &(result[i]) );
251  if( n != 1 ) {
252  throw std::runtime_error( "Invalid MD5 hex string." );
253  }
254  }
255 
256  return result;
257 }
258 
259 // ================================================================================================
260 // Internal Functions
261 // ================================================================================================
262 
263 void MD5::reset_()
264 {
265  count_[0] = 0;
266  count_[1] = 0;
267 
268  // load magic initialization constants.
269  state_[0] = 0x67452301;
270  state_[1] = 0xefcdab89;
271  state_[2] = 0x98badcfe;
272  state_[3] = 0x10325476;
273 }
274 
275 void MD5::update_( unsigned char const* input, size_type length )
276 {
277  // compute number of bytes mod 64
278  MD5::size_type index = count_[0] / 8 % MD5::BlockSize;
279 
280  // Update number of bits
281  if ((count_[0] += (length << 3)) < (length << 3)) {
282  count_[1]++;
283  }
284  count_[1] += (length >> 29);
285 
286  // number of bytes we need to fill in buffer
287  MD5::size_type firstpart = 64 - index;
288  MD5::size_type i;
289 
290  // transform as many times as possible.
291  if (length >= firstpart) {
292  // fill buffer first, transform
293  memcpy( &buffer_[index], input, firstpart );
294  transform_(buffer_);
295 
296  // transform chunks of MD5::BlockSize (64 bytes)
297  for (i = firstpart; i + MD5::BlockSize <= length; i += MD5::BlockSize) {
298  transform_( &input[i] );
299  }
300  index = 0;
301  } else {
302  i = 0;
303  }
304 
305  // buffer remaining input
306  memcpy( &buffer_[index], &input[i], length-i );
307 }
308 
309 inline uint32_t MD5::F_(uint32_t x, uint32_t y, uint32_t z)
310 {
311  return ( x & y ) | ( ~x & z );
312 }
313 
314 inline uint32_t MD5::G_(uint32_t x, uint32_t y, uint32_t z)
315 {
316  return ( x & z ) | ( y & ~z );
317 }
318 
319 inline uint32_t MD5::H_(uint32_t x, uint32_t y, uint32_t z)
320 {
321  return x ^ y ^ z;
322 }
323 
324 inline uint32_t MD5::I_(uint32_t x, uint32_t y, uint32_t z)
325 {
326  return y ^ ( x | ~z );
327 }
328 
329 inline uint32_t MD5::rotate_left_(uint32_t x, int n)
330 {
331  return (x << n) | (x >> (32-n));
332 }
333 
334 inline void MD5::FF_(
335  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
336 ) {
337  a = rotate_left_(a+ F_(b,c,d) + x + ac, s) + b;
338 }
339 
340 inline void MD5::GG_(
341  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
342 ) {
343  a = rotate_left_(a + G_(b,c,d) + x + ac, s) + b;
344 }
345 
346 inline void MD5::HH_(
347  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
348 ) {
349  a = rotate_left_(a + H_(b,c,d) + x + ac, s) + b;
350 }
351 
352 inline void MD5::II_(
353  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
354 ) {
355  a = rotate_left_(a + I_(b,c,d) + x + ac, s) + b;
356 }
357 
358 void MD5::transform_( const uint8_t block[MD5::BlockSize] )
359 {
360  uint32_t a = state_[0], b = state_[1], c = state_[2], d = state_[3], x[16];
361  decode_( x, block, MD5::BlockSize );
362 
363  // Constants for MD5Transform routine.
364  static uint32_t S11 = 7;
365  static uint32_t S12 = 12;
366  static uint32_t S13 = 17;
367  static uint32_t S14 = 22;
368  static uint32_t S21 = 5;
369  static uint32_t S22 = 9;
370  static uint32_t S23 = 14;
371  static uint32_t S24 = 20;
372  static uint32_t S31 = 4;
373  static uint32_t S32 = 11;
374  static uint32_t S33 = 16;
375  static uint32_t S34 = 23;
376  static uint32_t S41 = 6;
377  static uint32_t S42 = 10;
378  static uint32_t S43 = 15;
379  static uint32_t S44 = 21;
380 
381  /* Round 1 */
382  FF_ (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
383  FF_ (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
384  FF_ (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
385  FF_ (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
386  FF_ (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
387  FF_ (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
388  FF_ (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
389  FF_ (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
390  FF_ (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
391  FF_ (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
392  FF_ (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
393  FF_ (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
394  FF_ (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
395  FF_ (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
396  FF_ (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
397  FF_ (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
398 
399  /* Round 2 */
400  GG_ (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
401  GG_ (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
402  GG_ (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
403  GG_ (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
404  GG_ (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
405  GG_ (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
406  GG_ (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
407  GG_ (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
408  GG_ (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
409  GG_ (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
410  GG_ (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
411  GG_ (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
412  GG_ (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
413  GG_ (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
414  GG_ (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
415  GG_ (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
416 
417  /* Round 3 */
418  HH_ (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
419  HH_ (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
420  HH_ (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
421  HH_ (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
422  HH_ (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
423  HH_ (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
424  HH_ (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
425  HH_ (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
426  HH_ (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
427  HH_ (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
428  HH_ (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
429  HH_ (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
430  HH_ (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
431  HH_ (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
432  HH_ (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
433  HH_ (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
434 
435  /* Round 4 */
436  II_ (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
437  II_ (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
438  II_ (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
439  II_ (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
440  II_ (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
441  II_ (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
442  II_ (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
443  II_ (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
444  II_ (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
445  II_ (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
446  II_ (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
447  II_ (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
448  II_ (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
449  II_ (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
450  II_ (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
451  II_ (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
452 
453  state_[0] += a;
454  state_[1] += b;
455  state_[2] += c;
456  state_[3] += d;
457 
458  // Zeroize sensitive information.
459  memset(x, 0, sizeof x);
460 }
461 
462 void MD5::decode_( uint32_t output[], const uint8_t input[], size_type len )
463 {
464  for (unsigned int i = 0, j = 0; j < len; i++, j += 4) {
465  output[i]
466  = ((uint32_t)input[j])
467  | (((uint32_t)input[j+1]) << 8)
468  | (((uint32_t)input[j+2]) << 16)
469  | (((uint32_t)input[j+3]) << 24)
470  ;
471  }
472 }
473 
474 void MD5::encode_( uint8_t output[], const uint32_t input[], size_type len )
475 {
476  for (size_type i = 0, j = 0; j < len; i++, j += 4) {
477  output[j] = input[i] & 0xff;
478  output[j+1] = (input[i] >> 8) & 0xff;
479  output[j+2] = (input[i] >> 16) & 0xff;
480  output[j+3] = (input[i] >> 24) & 0xff;
481  }
482 }
483 
484 } // namespace utils
485 } // namespace genesis
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:82
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: md5.cpp:174
uint32_t size_type
Definition: md5.hpp:90
MD5()
Initialize the object for use.
Definition: md5.cpp:87
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:131
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: md5.cpp:99
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: md5.cpp:185
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:101
static const size_t BlockSize
Definition: md5.hpp:92
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:140
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:225
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: md5.cpp:206
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: md5.cpp:196
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:213