A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
md5.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  MD5 License
34  =================================================================
35 
36  The implementation is based on http://www.zedwood.com/article/cpp-md5-function,
37  which itself was converted to C++ class by Frank Thilo (thilo@unix-ag.org)
38  for bzflag (http://www.bzflag.org), based on:
39 
40  md5.h and md5.c
41  reference implemantion of RFC 1321
42 
43  Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
44  rights reserved.
45 
46  License to copy and use this software is granted provided that it
47  is identified as the "RSA Data Security, Inc. MD5 Message-Digest
48  Algorithm" in all material mentioning or referencing this software
49  or this function.
50 
51  License is also granted to make and use derivative works provided
52  that such works are identified as "derived from the RSA Data
53  Security, Inc. MD5 Message-Digest Algorithm" in all material
54  mentioning or referencing the derived work.
55 
56  RSA Data Security, Inc. makes no representations concerning either
57  the merchantability of this software or the suitability of this
58  software for any particular purpose. It is provided "as is"
59  without express or implied warranty of any kind.
60 
61  These notices must be retained in any copies of any part of this
62  documentation and/or software.
63 */
64 
66 
67 #include <algorithm>
68 #include <cinttypes>
69 #include <cstdio>
70 #include <cstring>
71 #include <fstream>
72 #include <iomanip>
73 #include <iostream>
74 #include <sstream>
75 #include <stdexcept>
76 
77 namespace genesis {
78 namespace utils {
79 
80 // ================================================================================================
81 // Constructors and Rule of Five
82 // ================================================================================================
83 
85 {
86  reset_();
87 }
88 
89 // ================================================================================================
90 // Member Functions
91 // ================================================================================================
92 
93 void MD5::clear()
94 {
95  reset_();
96 }
97 
98 void MD5::update( std::string const& s )
99 {
100  update( s.c_str(), s.size() );
101 }
102 
103 void MD5::update(std::istream& is)
104 {
105  char sbuf[MD5::BlockSize];
106  while (true) {
107 
108  // Read a block and use it for an update.
109  is.read( sbuf, MD5::BlockSize );
110  size_t cnt = is.gcount();
111  update( sbuf, cnt );
112 
113  // If we didn't get a full block, the stream is done.
114  if( cnt != MD5::BlockSize ) {
115  return;
116  }
117  }
118 }
119 
120 void MD5::update( char const* input, MD5::size_type length )
121 {
122  // Ugly conversion, but still better than the silent one used in the original code.
123  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
124  update_( in_uchar, length);
125 }
126 
127 std::string MD5::final_hex()
128 {
129  // Calculate digest, also reset for next use.
130  return digest_to_hex( final_digest() );
131 }
132 
134 {
135  static unsigned char padding[64] = {
136  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
139  };
140 
141  // Save number of bits
142  unsigned char bits[8];
143  encode_(bits, count_, 8);
144 
145  // pad out to 56 mod 64.
146  size_type index = count_[0] / 8 % 64;
147  size_type padLen = (index < 56) ? (56 - index) : (120 - index);
148  update_(padding, padLen);
149 
150  // Append length (before padding)
151  update_(bits, 8);
152 
153  // Store state in digest
154  encode_( digest_.data(), state_, 16 );
155 
156  // Zeroize sensitive information.
157  memset(buffer_, 0, sizeof buffer_);
158  memset(count_, 0, sizeof count_);
159 
160  reset_();
161  return digest_;
162 }
163 
164 std::string MD5::from_file_hex( std::string const& filename )
165 {
166  std::ifstream stream( filename.c_str(), std::ios::binary );
167  MD5 checksum;
168  checksum.update(stream);
169  return checksum.final_hex();
170 }
171 
172 MD5::DigestType MD5::from_file_digest( std::string const& filename )
173 {
174  std::ifstream stream( filename.c_str(), std::ios::binary );
175  MD5 checksum;
176  checksum.update(stream);
177  return checksum.final_digest();
178 }
179 
180 std::string MD5::from_string_hex( std::string const& input )
181 {
182  MD5 checksum;
183  checksum.update( input );
184  return checksum.final_hex();
185 }
186 
187 MD5::DigestType MD5::from_string_digest( std::string const& input )
188 {
189  MD5 checksum;
190  checksum.update( input );
191  return checksum.final_digest();
192 }
193 
194 std::string MD5::from_stream_hex( std::istream& is )
195 {
196  MD5 checksum;
197  checksum.update(is);
198  return checksum.final_hex();
199 }
200 
202 {
203  MD5 checksum;
204  checksum.update(is);
205  return checksum.final_digest();
206 }
207 
208 std::string MD5::digest_to_hex( MD5::DigestType const& digest )
209 {
210  /* Hex std::string */
211  std::ostringstream result;
212  for (size_t i = 0; i < digest.size(); ++i) {
213  result << std::hex << std::setfill('0') << std::setw(2);
214  result << static_cast<int>( digest[i] );
215  }
216 
217  return result.str();
218 }
219 
220 MD5::DigestType MD5::hex_to_digest( std::string const& hex )
221 {
222  // Safety first!
223  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
224  return std::isxdigit( c );
225  });
226  if( hex.size() != 32 || ! all_hex ) {
227  throw std::runtime_error( "Invalid MD5 hex string." );
228  }
229 
230  // The following test was introduced to check the scanf format "%2hhx",
231  // which just is an "unsigned char", which is not a fixed size.
232  // We now use the SCNxN typedefs that offer fixed with replacements, see
233  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
234 
235  // Make sure that the scan works!
236  // static_assert(
237  // sizeof( unsigned char ) == 1,
238  // "Cannot compile MD5::hex_to_digest() with sizeof( unsigned char ) != 1"
239  // );
240 
241  // Convert.
242  MD5::DigestType result;
243  for (size_t i = 0; i < result.size(); ++i) {
244  // auto const n = sscanf( &hex[ 2 * i ], "%2hhx", &(result[i]) );
245  auto const n = sscanf( &hex[ 2 * i ], "%2" SCNx8, &(result[i]) );
246  if( n != 1 ) {
247  throw std::runtime_error( "Invalid MD5 hex string." );
248  }
249  }
250 
251  return result;
252 }
253 
254 // ================================================================================================
255 // Internal Functions
256 // ================================================================================================
257 
258 void MD5::reset_()
259 {
260  count_[0] = 0;
261  count_[1] = 0;
262 
263  // load magic initialization constants.
264  state_[0] = 0x67452301;
265  state_[1] = 0xefcdab89;
266  state_[2] = 0x98badcfe;
267  state_[3] = 0x10325476;
268 }
269 
270 void MD5::update_( unsigned char const* input, size_type length )
271 {
272  // compute number of bytes mod 64
273  MD5::size_type index = count_[0] / 8 % MD5::BlockSize;
274 
275  // Update number of bits
276  if ((count_[0] += (length << 3)) < (length << 3)) {
277  count_[1]++;
278  }
279  count_[1] += (length >> 29);
280 
281  // number of bytes we need to fill in buffer
282  MD5::size_type firstpart = 64 - index;
283  MD5::size_type i;
284 
285  // transform as many times as possible.
286  if (length >= firstpart) {
287  // fill buffer first, transform
288  memcpy( &buffer_[index], input, firstpart );
289  transform_(buffer_);
290 
291  // transform chunks of MD5::BlockSize (64 bytes)
292  for (i = firstpart; i + MD5::BlockSize <= length; i += MD5::BlockSize) {
293  transform_( &input[i] );
294  }
295  index = 0;
296  } else {
297  i = 0;
298  }
299 
300  // buffer remaining input
301  memcpy( &buffer_[index], &input[i], length-i );
302 }
303 
304 inline uint32_t MD5::F_(uint32_t x, uint32_t y, uint32_t z)
305 {
306  return ( x & y ) | ( ~x & z );
307 }
308 
309 inline uint32_t MD5::G_(uint32_t x, uint32_t y, uint32_t z)
310 {
311  return ( x & z ) | ( y & ~z );
312 }
313 
314 inline uint32_t MD5::H_(uint32_t x, uint32_t y, uint32_t z)
315 {
316  return x ^ y ^ z;
317 }
318 
319 inline uint32_t MD5::I_(uint32_t x, uint32_t y, uint32_t z)
320 {
321  return y ^ ( x | ~z );
322 }
323 
324 inline uint32_t MD5::rotate_left_(uint32_t x, int n)
325 {
326  return (x << n) | (x >> (32-n));
327 }
328 
329 inline void MD5::FF_(
330  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
331 ) {
332  a = rotate_left_(a+ F_(b,c,d) + x + ac, s) + b;
333 }
334 
335 inline void MD5::GG_(
336  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
337 ) {
338  a = rotate_left_(a + G_(b,c,d) + x + ac, s) + b;
339 }
340 
341 inline void MD5::HH_(
342  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
343 ) {
344  a = rotate_left_(a + H_(b,c,d) + x + ac, s) + b;
345 }
346 
347 inline void MD5::II_(
348  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
349 ) {
350  a = rotate_left_(a + I_(b,c,d) + x + ac, s) + b;
351 }
352 
353 void MD5::transform_( const uint8_t block[MD5::BlockSize] )
354 {
355  uint32_t a = state_[0], b = state_[1], c = state_[2], d = state_[3], x[16];
356  decode_( x, block, MD5::BlockSize );
357 
358  // Constants for MD5Transform routine.
359  static uint32_t S11 = 7;
360  static uint32_t S12 = 12;
361  static uint32_t S13 = 17;
362  static uint32_t S14 = 22;
363  static uint32_t S21 = 5;
364  static uint32_t S22 = 9;
365  static uint32_t S23 = 14;
366  static uint32_t S24 = 20;
367  static uint32_t S31 = 4;
368  static uint32_t S32 = 11;
369  static uint32_t S33 = 16;
370  static uint32_t S34 = 23;
371  static uint32_t S41 = 6;
372  static uint32_t S42 = 10;
373  static uint32_t S43 = 15;
374  static uint32_t S44 = 21;
375 
376  /* Round 1 */
377  FF_ (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
378  FF_ (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
379  FF_ (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
380  FF_ (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
381  FF_ (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
382  FF_ (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
383  FF_ (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
384  FF_ (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
385  FF_ (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
386  FF_ (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
387  FF_ (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
388  FF_ (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
389  FF_ (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
390  FF_ (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
391  FF_ (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
392  FF_ (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
393 
394  /* Round 2 */
395  GG_ (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
396  GG_ (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
397  GG_ (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
398  GG_ (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
399  GG_ (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
400  GG_ (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
401  GG_ (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
402  GG_ (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
403  GG_ (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
404  GG_ (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
405  GG_ (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
406  GG_ (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
407  GG_ (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
408  GG_ (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
409  GG_ (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
410  GG_ (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
411 
412  /* Round 3 */
413  HH_ (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
414  HH_ (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
415  HH_ (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
416  HH_ (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
417  HH_ (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
418  HH_ (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
419  HH_ (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
420  HH_ (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
421  HH_ (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
422  HH_ (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
423  HH_ (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
424  HH_ (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
425  HH_ (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
426  HH_ (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
427  HH_ (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
428  HH_ (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
429 
430  /* Round 4 */
431  II_ (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
432  II_ (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
433  II_ (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
434  II_ (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
435  II_ (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
436  II_ (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
437  II_ (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
438  II_ (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
439  II_ (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
440  II_ (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
441  II_ (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
442  II_ (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
443  II_ (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
444  II_ (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
445  II_ (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
446  II_ (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
447 
448  state_[0] += a;
449  state_[1] += b;
450  state_[2] += c;
451  state_[3] += d;
452 
453  // Zeroize sensitive information.
454  memset(x, 0, sizeof x);
455 }
456 
457 void MD5::decode_( uint32_t output[], const uint8_t input[], size_type len )
458 {
459  for (unsigned int i = 0, j = 0; j < len; i++, j += 4) {
460  output[i]
461  = ((uint32_t)input[j])
462  | (((uint32_t)input[j+1]) << 8)
463  | (((uint32_t)input[j+2]) << 16)
464  | (((uint32_t)input[j+3]) << 24)
465  ;
466  }
467 }
468 
469 void MD5::encode_( uint8_t output[], const uint32_t input[], size_type len )
470 {
471  for (size_type i = 0, j = 0; j < len; i++, j += 4) {
472  output[j] = input[i] & 0xff;
473  output[j+1] = (input[i] >> 8) & 0xff;
474  output[j+2] = (input[i] >> 16) & 0xff;
475  output[j+3] = (input[i] >> 24) & 0xff;
476  }
477 }
478 
479 } // namespace utils
480 } // namespace genesis
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:82
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: md5.cpp:164
uint32_t size_type
Definition: md5.hpp:90
MD5()
Initialize the object for use.
Definition: md5.cpp:84
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:127
static DigestType from_stream_digest(std::istream &is)
Calculate the hash digest for the content of a stream.
Definition: md5.cpp:201
static std::string from_stream_hex(std::istream &is)
Calculate the checksum for the content of a stream.
Definition: md5.cpp:194
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: md5.cpp:98
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: md5.cpp:172
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:101
static const size_t BlockSize
Definition: md5.hpp:92
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:93
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:133
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:220
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: md5.cpp:187
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: md5.cpp:180
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:208