A library for working with phylogenetic and population genetic data.
v0.32.0
md5.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2023 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 /*
32  =================================================================
33  MD5 License
34  =================================================================
35 
36  The implementation is based on http://www.zedwood.com/article/cpp-md5-function,
37  which itself was converted to C++ class by Frank Thilo (thilo@unix-ag.org)
38  for bzflag (http://www.bzflag.org), based on:
39 
40  md5.h and md5.c
41  reference implemantion of RFC 1321
42 
43  Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
44  rights reserved.
45 
46  License to copy and use this software is granted provided that it
47  is identified as the "RSA Data Security, Inc. MD5 Message-Digest
48  Algorithm" in all material mentioning or referencing this software
49  or this function.
50 
51  License is also granted to make and use derivative works provided
52  that such works are identified as "derived from the RSA Data
53  Security, Inc. MD5 Message-Digest Algorithm" in all material
54  mentioning or referencing the derived work.
55 
56  RSA Data Security, Inc. makes no representations concerning either
57  the merchantability of this software or the suitability of this
58  software for any particular purpose. It is provided "as is"
59  without express or implied warranty of any kind.
60 
61  These notices must be retained in any copies of any part of this
62  documentation and/or software.
63 */
64 
66 
68 
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdlib>
72 #include <cstdio>
73 #include <cstring>
74 #include <fstream>
75 #include <iomanip>
76 #include <iostream>
77 #include <sstream>
78 #include <stdexcept>
79 
80 namespace genesis {
81 namespace utils {
82 
83 // ================================================================================================
84 // Constructors and Rule of Five
85 // ================================================================================================
86 
88 {
89  reset_();
90 }
91 
92 // ================================================================================================
93 // Full Hashing
94 // ================================================================================================
95 
96 std::string MD5::read_hex( std::shared_ptr<BaseInputSource> source )
97 {
98  MD5 checksum;
99  checksum.update( source );
100  return checksum.final_hex();
101 }
102 
103 MD5::DigestType MD5::read_digest( std::shared_ptr<BaseInputSource> source )
104 {
105  MD5 checksum;
106  checksum.update( source );
107  return checksum.final_digest();
108 }
109 
110 std::string MD5::digest_to_hex( MD5::DigestType const& digest )
111 {
112  /* Hex std::string */
113  std::ostringstream result;
114  for (size_t i = 0; i < digest.size(); ++i) {
115  result << std::hex << std::setfill('0') << std::setw(2);
116  result << static_cast<int>( digest[i] );
117  }
118 
119  return result.str();
120 }
121 
122 MD5::DigestType MD5::hex_to_digest( std::string const& hex )
123 {
124  // Safety first!
125  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
126  return std::isxdigit( c );
127  });
128  if( hex.size() != 32 || ! all_hex ) {
129  throw std::runtime_error( "Invalid MD5 hex string." );
130  }
131 
132  // Convert.
133  MD5::DigestType result;
134  for (size_t i = 0; i < result.size(); ++i) {
135 
136  // Read the symbols into the digest. We tried this before using sscanf(), but that got quite
137  // messy, as the int widths of the hex format macros are not consistent across compilers...
138  // So now we copy the individual fragements to a string. Bit expensive, but a digest is short.
139  std::string const sub = hex.substr( 2*i, 2 );
140  char* endptr;
141  auto const res = strtoul( sub.c_str(), &endptr, 16 );
142  if( *endptr != 0 ) {
143  throw std::runtime_error( "Invalid MD5 hex string: \"" + hex + "\"" );
144  }
145  result[i] = static_cast<uint32_t>(res);
146  }
147 
148  return result;
149 }
150 
151 // ================================================================================================
152 // Iterative Hashing
153 // ================================================================================================
154 
156 {
157  reset_();
158 }
159 
160 void MD5::update( std::shared_ptr<BaseInputSource> source )
161 {
162  auto ib = InputBuffer( source );
163  char sbuf[MD5::BlockSize];
164 
165  while (true) {
166 
167  // Read a block and use it for an update.
168  auto cnt = ib.read( sbuf, MD5::BlockSize );
169  update( sbuf, cnt );
170 
171  // If we didn't get a full block, the input is done.
172  if( cnt != MD5::BlockSize ) {
173  return;
174  }
175  }
176 }
177 
178 void MD5::update( std::string const& s )
179 {
180  update( s.c_str(), s.size() );
181 }
182 
183 void MD5::update(std::istream& is)
184 {
185  char sbuf[MD5::BlockSize];
186  while (true) {
187 
188  // Read a block and use it for an update.
189  is.read( sbuf, MD5::BlockSize );
190  size_t cnt = is.gcount();
191  update( sbuf, cnt );
192 
193  // If we didn't get a full block, the stream is done.
194  if( cnt != MD5::BlockSize ) {
195  return;
196  }
197  }
198 }
199 
200 void MD5::update( char const* input, MD5::size_type length )
201 {
202  // Ugly conversion, but still better than the silent one used in the original code.
203  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
204  update_( in_uchar, length );
205 }
206 
207 std::string MD5::final_hex()
208 {
209  // Calculate digest, also reset for next use.
210  return digest_to_hex( final_digest() );
211 }
212 
214 {
215  static unsigned char padding[64] = {
216  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
218  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
219  };
220 
221  // Save number of bits
222  unsigned char bits[8];
223  encode_(bits, count_, 8);
224 
225  // pad out to 56 mod 64.
226  size_type index = count_[0] / 8 % 64;
227  size_type padLen = (index < 56) ? (56 - index) : (120 - index);
228  update_(padding, padLen);
229 
230  // Append length (before padding)
231  update_(bits, 8);
232 
233  // Store state in digest
234  encode_( digest_.data(), state_, 16 );
235 
236  // Zeroize sensitive information.
237  memset(buffer_, 0, sizeof buffer_);
238  memset(count_, 0, sizeof count_);
239 
240  reset_();
241  return digest_;
242 }
243 
244 // ================================================================================================
245 // Internal Functions
246 // ================================================================================================
247 
248 void MD5::reset_()
249 {
250  count_[0] = 0;
251  count_[1] = 0;
252 
253  // load magic initialization constants.
254  state_[0] = 0x67452301;
255  state_[1] = 0xefcdab89;
256  state_[2] = 0x98badcfe;
257  state_[3] = 0x10325476;
258 }
259 
260 void MD5::update_( unsigned char const* input, size_type length )
261 {
262  // compute number of bytes mod 64
263  MD5::size_type index = count_[0] / 8 % MD5::BlockSize;
264 
265  // Update number of bits
266  if( (count_[0] += (length << 3)) < (length << 3) ) {
267  count_[1]++;
268  }
269  count_[1] += (length >> 29);
270 
271  // number of bytes we need to fill in buffer
272  assert( index < 64 );
273  MD5::size_type firstpart = 64 - index;
274  MD5::size_type i;
275 
276  // transform as many times as possible.
277  if( length >= firstpart ) {
278  // fill buffer first, transform
279  memcpy( &buffer_[index], input, firstpart );
280  transform_(buffer_);
281 
282  // transform chunks of MD5::BlockSize (64 bytes)
283  for( i = firstpart; i + MD5::BlockSize <= length; i += MD5::BlockSize ) {
284  transform_( &input[i] );
285  }
286  index = 0;
287  } else {
288  i = 0;
289  }
290 
291  // buffer remaining input
292  assert( index < MD5::BlockSize );
293  assert( i <= length );
294  memcpy( &buffer_[index], &input[i], length-i );
295 }
296 
297 inline uint32_t MD5::F_(uint32_t x, uint32_t y, uint32_t z)
298 {
299  return ( x & y ) | ( ~x & z );
300 }
301 
302 inline uint32_t MD5::G_(uint32_t x, uint32_t y, uint32_t z)
303 {
304  return ( x & z ) | ( y & ~z );
305 }
306 
307 inline uint32_t MD5::H_(uint32_t x, uint32_t y, uint32_t z)
308 {
309  return x ^ y ^ z;
310 }
311 
312 inline uint32_t MD5::I_(uint32_t x, uint32_t y, uint32_t z)
313 {
314  return y ^ ( x | ~z );
315 }
316 
317 inline uint32_t MD5::rotate_left_(uint32_t x, int n)
318 {
319  return (x << n) | (x >> (32-n));
320 }
321 
322 inline void MD5::FF_(
323  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
324 ) {
325  a = rotate_left_(a+ F_(b,c,d) + x + ac, s) + b;
326 }
327 
328 inline void MD5::GG_(
329  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
330 ) {
331  a = rotate_left_(a + G_(b,c,d) + x + ac, s) + b;
332 }
333 
334 inline void MD5::HH_(
335  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
336 ) {
337  a = rotate_left_(a + H_(b,c,d) + x + ac, s) + b;
338 }
339 
340 inline void MD5::II_(
341  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
342 ) {
343  a = rotate_left_(a + I_(b,c,d) + x + ac, s) + b;
344 }
345 
346 void MD5::transform_( const uint8_t block[MD5::BlockSize] )
347 {
348  uint32_t a = state_[0], b = state_[1], c = state_[2], d = state_[3], x[16];
349  decode_( x, block, MD5::BlockSize );
350 
351  // Constants for MD5Transform routine.
352  static uint32_t S11 = 7;
353  static uint32_t S12 = 12;
354  static uint32_t S13 = 17;
355  static uint32_t S14 = 22;
356  static uint32_t S21 = 5;
357  static uint32_t S22 = 9;
358  static uint32_t S23 = 14;
359  static uint32_t S24 = 20;
360  static uint32_t S31 = 4;
361  static uint32_t S32 = 11;
362  static uint32_t S33 = 16;
363  static uint32_t S34 = 23;
364  static uint32_t S41 = 6;
365  static uint32_t S42 = 10;
366  static uint32_t S43 = 15;
367  static uint32_t S44 = 21;
368 
369  /* Round 1 */
370  FF_ (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
371  FF_ (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
372  FF_ (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
373  FF_ (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
374  FF_ (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
375  FF_ (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
376  FF_ (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
377  FF_ (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
378  FF_ (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
379  FF_ (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
380  FF_ (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
381  FF_ (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
382  FF_ (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
383  FF_ (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
384  FF_ (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
385  FF_ (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
386 
387  /* Round 2 */
388  GG_ (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
389  GG_ (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
390  GG_ (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
391  GG_ (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
392  GG_ (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
393  GG_ (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
394  GG_ (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
395  GG_ (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
396  GG_ (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
397  GG_ (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
398  GG_ (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
399  GG_ (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
400  GG_ (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
401  GG_ (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
402  GG_ (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
403  GG_ (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
404 
405  /* Round 3 */
406  HH_ (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
407  HH_ (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
408  HH_ (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
409  HH_ (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
410  HH_ (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
411  HH_ (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
412  HH_ (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
413  HH_ (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
414  HH_ (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
415  HH_ (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
416  HH_ (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
417  HH_ (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
418  HH_ (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
419  HH_ (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
420  HH_ (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
421  HH_ (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
422 
423  /* Round 4 */
424  II_ (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
425  II_ (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
426  II_ (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
427  II_ (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
428  II_ (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
429  II_ (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
430  II_ (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
431  II_ (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
432  II_ (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
433  II_ (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
434  II_ (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
435  II_ (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
436  II_ (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
437  II_ (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
438  II_ (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
439  II_ (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
440 
441  state_[0] += a;
442  state_[1] += b;
443  state_[2] += c;
444  state_[3] += d;
445 
446  // Zeroize sensitive information.
447  memset(x, 0, sizeof x);
448 }
449 
450 void MD5::decode_( uint32_t output[], const uint8_t input[], size_type len )
451 {
452  for (unsigned int i = 0, j = 0; j < len; i++, j += 4) {
453  output[i]
454  = ((uint32_t)input[j])
455  | (((uint32_t)input[j+1]) << 8)
456  | (((uint32_t)input[j+2]) << 16)
457  | (((uint32_t)input[j+3]) << 24)
458  ;
459  }
460 }
461 
462 void MD5::encode_( uint8_t output[], const uint32_t input[], size_type len )
463 {
464  for (size_type i = 0, j = 0; j < len; i++, j += 4) {
465  output[j] = input[i] & 0xff;
466  output[j+1] = (input[i] >> 8) & 0xff;
467  output[j+2] = (input[i] >> 16) & 0xff;
468  output[j+3] = (input[i] >> 24) & 0xff;
469  }
470 }
471 
472 } // namespace utils
473 } // namespace genesis
genesis::utils::MD5
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:88
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::MD5::BlockSize
static const size_t BlockSize
Definition: md5.hpp:98
genesis::utils::MD5::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: md5.cpp:160
genesis::utils::MD5::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: md5.cpp:96
genesis::utils::MD5::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:122
genesis::utils::MD5::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:155
genesis::utils::MD5::DigestType
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:107
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
md5.hpp
genesis::utils::MD5::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:110
genesis::utils::MD5::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:213
genesis::utils::MD5::MD5
MD5()
Initialize the object for use.
Definition: md5.cpp:87
genesis::utils::MD5::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:207
genesis::utils::InputBuffer
Definition: input_buffer.hpp:49
genesis::utils::MD5::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: md5.cpp:103
input_buffer.hpp
genesis::utils::MD5::size_type
uint32_t size_type
Definition: md5.hpp:96