A library for working with phylogenetic and population genetic data.
v0.27.0
md5.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 /*
32  =================================================================
33  MD5 License
34  =================================================================
35 
36  The implementation is based on http://www.zedwood.com/article/cpp-md5-function,
37  which itself was converted to C++ class by Frank Thilo (thilo@unix-ag.org)
38  for bzflag (http://www.bzflag.org), based on:
39 
40  md5.h and md5.c
41  reference implemantion of RFC 1321
42 
43  Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
44  rights reserved.
45 
46  License to copy and use this software is granted provided that it
47  is identified as the "RSA Data Security, Inc. MD5 Message-Digest
48  Algorithm" in all material mentioning or referencing this software
49  or this function.
50 
51  License is also granted to make and use derivative works provided
52  that such works are identified as "derived from the RSA Data
53  Security, Inc. MD5 Message-Digest Algorithm" in all material
54  mentioning or referencing the derived work.
55 
56  RSA Data Security, Inc. makes no representations concerning either
57  the merchantability of this software or the suitability of this
58  software for any particular purpose. It is provided "as is"
59  without express or implied warranty of any kind.
60 
61  These notices must be retained in any copies of any part of this
62  documentation and/or software.
63 */
64 
66 
68 
69 // Apparently, for some compilers, the folllowing definition has to be set in order for <cinttypes>
70 // to include the scanf format types, see https://stackoverflow.com/a/30851225/4184258
71 #define __STDC_FORMAT_MACROS
72 
73 #include <algorithm>
74 #include <cinttypes>
75 #include <cstdio>
76 #include <cstring>
77 #include <fstream>
78 #include <iomanip>
79 #include <iostream>
80 #include <sstream>
81 #include <stdexcept>
82 
83 // In the case that the above inclusion of <cinttypes> still did not give us the scanf format types
84 // that we need, we define it ourselves, see https://helpmanual.io/man3/SCNx8-avr/
85 // but also emit a message to show that this is the case, as a hint for debugging.
86 #ifndef SCNx8
87  // hexadecimal scanf format for uint8_t
88  #define SCNx8 "hhx"
89  #pragma message ( \
90  "<inttypes.h> did not provide a definition of `SCNx8`, " \
91  "which we hence define here as `hhx`" \
92  )
93 #endif
94 
95 namespace genesis {
96 namespace utils {
97 
98 // ================================================================================================
99 // Constructors and Rule of Five
100 // ================================================================================================
101 
103 {
104  reset_();
105 }
106 
107 // ================================================================================================
108 // Full Hashing
109 // ================================================================================================
110 
111 std::string MD5::read_hex( std::shared_ptr<BaseInputSource> source )
112 {
113  MD5 checksum;
114  checksum.update( source );
115  return checksum.final_hex();
116 }
117 
118 MD5::DigestType MD5::read_digest( std::shared_ptr<BaseInputSource> source )
119 {
120  MD5 checksum;
121  checksum.update( source );
122  return checksum.final_digest();
123 }
124 
125 std::string MD5::digest_to_hex( MD5::DigestType const& digest )
126 {
127  /* Hex std::string */
128  std::ostringstream result;
129  for (size_t i = 0; i < digest.size(); ++i) {
130  result << std::hex << std::setfill('0') << std::setw(2);
131  result << static_cast<int>( digest[i] );
132  }
133 
134  return result.str();
135 }
136 
137 MD5::DigestType MD5::hex_to_digest( std::string const& hex )
138 {
139  // Safety first!
140  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
141  return std::isxdigit( c );
142  });
143  if( hex.size() != 32 || ! all_hex ) {
144  throw std::runtime_error( "Invalid MD5 hex string." );
145  }
146 
147  // The following test was introduced to check the scanf format "%2hhx",
148  // which just is an "unsigned char", which is not a fixed size.
149  // We now use the SCNxN typedefs that offer fixed width replacements, see
150  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
151 
152  // Make sure that the scan works!
153  // static_assert(
154  // sizeof( unsigned char ) == 1,
155  // "Cannot compile MD5::hex_to_digest() with sizeof( unsigned char ) != 1"
156  // );
157 
158  // Convert.
159  MD5::DigestType result;
160  for (size_t i = 0; i < result.size(); ++i) {
161  // auto const n = sscanf( &hex[ 2 * i ], "%2hhx", &(result[i]) );
162  auto const n = sscanf( &hex[ 2 * i ], "%2" SCNx8, &(result[i]) );
163  if( n != 1 ) {
164  throw std::runtime_error( "Invalid MD5 hex string." );
165  }
166  }
167 
168  return result;
169 }
170 
171 // ================================================================================================
172 // Iterative Hashing
173 // ================================================================================================
174 
176 {
177  reset_();
178 }
179 
180 void MD5::update( std::shared_ptr<BaseInputSource> source )
181 {
182  auto ib = InputBuffer( source );
183  char sbuf[MD5::BlockSize];
184 
185  while (true) {
186 
187  // Read a block and use it for an update.
188  auto cnt = ib.read( sbuf, MD5::BlockSize );
189  update( sbuf, cnt );
190 
191  // If we didn't get a full block, the input is done.
192  if( cnt != MD5::BlockSize ) {
193  return;
194  }
195  }
196 }
197 
198 void MD5::update( std::string const& s )
199 {
200  update( s.c_str(), s.size() );
201 }
202 
203 void MD5::update(std::istream& is)
204 {
205  char sbuf[MD5::BlockSize];
206  while (true) {
207 
208  // Read a block and use it for an update.
209  is.read( sbuf, MD5::BlockSize );
210  size_t cnt = is.gcount();
211  update( sbuf, cnt );
212 
213  // If we didn't get a full block, the stream is done.
214  if( cnt != MD5::BlockSize ) {
215  return;
216  }
217  }
218 }
219 
220 void MD5::update( char const* input, MD5::size_type length )
221 {
222  // Ugly conversion, but still better than the silent one used in the original code.
223  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
224  update_( in_uchar, length);
225 }
226 
227 std::string MD5::final_hex()
228 {
229  // Calculate digest, also reset for next use.
230  return digest_to_hex( final_digest() );
231 }
232 
234 {
235  static unsigned char padding[64] = {
236  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
239  };
240 
241  // Save number of bits
242  unsigned char bits[8];
243  encode_(bits, count_, 8);
244 
245  // pad out to 56 mod 64.
246  size_type index = count_[0] / 8 % 64;
247  size_type padLen = (index < 56) ? (56 - index) : (120 - index);
248  update_(padding, padLen);
249 
250  // Append length (before padding)
251  update_(bits, 8);
252 
253  // Store state in digest
254  encode_( digest_.data(), state_, 16 );
255 
256  // Zeroize sensitive information.
257  memset(buffer_, 0, sizeof buffer_);
258  memset(count_, 0, sizeof count_);
259 
260  reset_();
261  return digest_;
262 }
263 
264 // ================================================================================================
265 // Internal Functions
266 // ================================================================================================
267 
268 void MD5::reset_()
269 {
270  count_[0] = 0;
271  count_[1] = 0;
272 
273  // load magic initialization constants.
274  state_[0] = 0x67452301;
275  state_[1] = 0xefcdab89;
276  state_[2] = 0x98badcfe;
277  state_[3] = 0x10325476;
278 }
279 
280 void MD5::update_( unsigned char const* input, size_type length )
281 {
282  // compute number of bytes mod 64
283  MD5::size_type index = count_[0] / 8 % MD5::BlockSize;
284 
285  // Update number of bits
286  if ((count_[0] += (length << 3)) < (length << 3)) {
287  count_[1]++;
288  }
289  count_[1] += (length >> 29);
290 
291  // number of bytes we need to fill in buffer
292  MD5::size_type firstpart = 64 - index;
293  MD5::size_type i;
294 
295  // transform as many times as possible.
296  if (length >= firstpart) {
297  // fill buffer first, transform
298  memcpy( &buffer_[index], input, firstpart );
299  transform_(buffer_);
300 
301  // transform chunks of MD5::BlockSize (64 bytes)
302  for (i = firstpart; i + MD5::BlockSize <= length; i += MD5::BlockSize) {
303  transform_( &input[i] );
304  }
305  index = 0;
306  } else {
307  i = 0;
308  }
309 
310  // buffer remaining input
311  memcpy( &buffer_[index], &input[i], length-i );
312 }
313 
314 inline uint32_t MD5::F_(uint32_t x, uint32_t y, uint32_t z)
315 {
316  return ( x & y ) | ( ~x & z );
317 }
318 
319 inline uint32_t MD5::G_(uint32_t x, uint32_t y, uint32_t z)
320 {
321  return ( x & z ) | ( y & ~z );
322 }
323 
324 inline uint32_t MD5::H_(uint32_t x, uint32_t y, uint32_t z)
325 {
326  return x ^ y ^ z;
327 }
328 
329 inline uint32_t MD5::I_(uint32_t x, uint32_t y, uint32_t z)
330 {
331  return y ^ ( x | ~z );
332 }
333 
334 inline uint32_t MD5::rotate_left_(uint32_t x, int n)
335 {
336  return (x << n) | (x >> (32-n));
337 }
338 
339 inline void MD5::FF_(
340  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
341 ) {
342  a = rotate_left_(a+ F_(b,c,d) + x + ac, s) + b;
343 }
344 
345 inline void MD5::GG_(
346  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
347 ) {
348  a = rotate_left_(a + G_(b,c,d) + x + ac, s) + b;
349 }
350 
351 inline void MD5::HH_(
352  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
353 ) {
354  a = rotate_left_(a + H_(b,c,d) + x + ac, s) + b;
355 }
356 
357 inline void MD5::II_(
358  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
359 ) {
360  a = rotate_left_(a + I_(b,c,d) + x + ac, s) + b;
361 }
362 
363 void MD5::transform_( const uint8_t block[MD5::BlockSize] )
364 {
365  uint32_t a = state_[0], b = state_[1], c = state_[2], d = state_[3], x[16];
366  decode_( x, block, MD5::BlockSize );
367 
368  // Constants for MD5Transform routine.
369  static uint32_t S11 = 7;
370  static uint32_t S12 = 12;
371  static uint32_t S13 = 17;
372  static uint32_t S14 = 22;
373  static uint32_t S21 = 5;
374  static uint32_t S22 = 9;
375  static uint32_t S23 = 14;
376  static uint32_t S24 = 20;
377  static uint32_t S31 = 4;
378  static uint32_t S32 = 11;
379  static uint32_t S33 = 16;
380  static uint32_t S34 = 23;
381  static uint32_t S41 = 6;
382  static uint32_t S42 = 10;
383  static uint32_t S43 = 15;
384  static uint32_t S44 = 21;
385 
386  /* Round 1 */
387  FF_ (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
388  FF_ (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
389  FF_ (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
390  FF_ (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
391  FF_ (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
392  FF_ (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
393  FF_ (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
394  FF_ (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
395  FF_ (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
396  FF_ (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
397  FF_ (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
398  FF_ (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
399  FF_ (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
400  FF_ (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
401  FF_ (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
402  FF_ (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
403 
404  /* Round 2 */
405  GG_ (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
406  GG_ (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
407  GG_ (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
408  GG_ (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
409  GG_ (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
410  GG_ (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
411  GG_ (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
412  GG_ (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
413  GG_ (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
414  GG_ (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
415  GG_ (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
416  GG_ (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
417  GG_ (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
418  GG_ (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
419  GG_ (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
420  GG_ (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
421 
422  /* Round 3 */
423  HH_ (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
424  HH_ (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
425  HH_ (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
426  HH_ (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
427  HH_ (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
428  HH_ (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
429  HH_ (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
430  HH_ (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
431  HH_ (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
432  HH_ (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
433  HH_ (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
434  HH_ (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
435  HH_ (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
436  HH_ (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
437  HH_ (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
438  HH_ (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
439 
440  /* Round 4 */
441  II_ (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
442  II_ (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
443  II_ (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
444  II_ (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
445  II_ (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
446  II_ (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
447  II_ (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
448  II_ (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
449  II_ (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
450  II_ (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
451  II_ (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
452  II_ (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
453  II_ (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
454  II_ (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
455  II_ (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
456  II_ (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
457 
458  state_[0] += a;
459  state_[1] += b;
460  state_[2] += c;
461  state_[3] += d;
462 
463  // Zeroize sensitive information.
464  memset(x, 0, sizeof x);
465 }
466 
467 void MD5::decode_( uint32_t output[], const uint8_t input[], size_type len )
468 {
469  for (unsigned int i = 0, j = 0; j < len; i++, j += 4) {
470  output[i]
471  = ((uint32_t)input[j])
472  | (((uint32_t)input[j+1]) << 8)
473  | (((uint32_t)input[j+2]) << 16)
474  | (((uint32_t)input[j+3]) << 24)
475  ;
476  }
477 }
478 
479 void MD5::encode_( uint8_t output[], const uint32_t input[], size_type len )
480 {
481  for (size_type i = 0, j = 0; j < len; i++, j += 4) {
482  output[j] = input[i] & 0xff;
483  output[j+1] = (input[i] >> 8) & 0xff;
484  output[j+2] = (input[i] >> 16) & 0xff;
485  output[j+3] = (input[i] >> 24) & 0xff;
486  }
487 }
488 
489 } // namespace utils
490 } // namespace genesis
genesis::utils::MD5
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:88
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::MD5::BlockSize
static const size_t BlockSize
Definition: md5.hpp:98
genesis::utils::MD5::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: md5.cpp:180
genesis::utils::MD5::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: md5.cpp:111
genesis::utils::MD5::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:137
genesis::utils::MD5::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:175
genesis::utils::MD5::DigestType
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:107
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
md5.hpp
genesis::utils::MD5::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:125
genesis::utils::MD5::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:233
genesis::utils::MD5::MD5
MD5()
Initialize the object for use.
Definition: md5.cpp:102
genesis::utils::MD5::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:227
genesis::utils::InputBuffer
Definition: input_buffer.hpp:49
genesis::utils::MD5::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: md5.cpp:118
input_buffer.hpp
SCNx8
#define SCNx8
Definition: md5.cpp:88
genesis::utils::MD5::size_type
uint32_t size_type
Definition: md5.hpp:96