A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
md5.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_TOOLS_MD5_H_
2 #define GENESIS_UTILS_TOOLS_MD5_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <array>
35 #include <cstdint>
36 #include <iosfwd>
37 #include <string>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // ================================================================================================
43 // MD5
44 // ================================================================================================
45 
82 class MD5
83 {
84 public:
85 
86  // -------------------------------------------------------------------------
87  // Typedefs and Constants
88  // -------------------------------------------------------------------------
89 
90  using size_type = uint32_t;
91 
92  static const size_t BlockSize = 64;
93 
101  using DigestType = std::array< uint8_t, 16 >;
102 
103  // -------------------------------------------------------------------------
104  // Constructors and Rule of Five
105  // -------------------------------------------------------------------------
106 
110  MD5();
111  ~MD5() = default;
112 
113  MD5( MD5 const& ) = default;
114  MD5( MD5&& ) = default;
115 
116  MD5& operator= ( MD5 const& ) = default;
117  MD5& operator= ( MD5&& ) = default;
118 
119  // -------------------------------------------------------------------------
120  // Member Functions
121  // -------------------------------------------------------------------------
122 
126  void clear();
127 
131  void update( std::string const& s );
132  void update( std::istream& is );
133  void update( char const* input, size_type length );
134 
138  std::string final_hex();
139 
144 
148  static std::string from_file_hex( std::string const& filename );
149 
153  static DigestType from_file_digest( std::string const& filename );
154 
158  static std::string from_string_hex( std::string const& input );
159 
163  static DigestType from_string_digest( std::string const& input );
164 
168  static std::string from_stream_hex( std::istream& is );
169 
173  static DigestType from_stream_digest( std::istream& is );
174 
175  static std::string digest_to_hex( DigestType const& digest );
176  static DigestType hex_to_digest( std::string const& hex );
177 
178  // -------------------------------------------------------------------------
179  // Internal Functions
180  // -------------------------------------------------------------------------
181 
182 private:
183 
184  void reset_();
185 
186  // MD5 block update operation. Continues an MD5 message-digest
187  // operation, processing another message block
188  void update_( unsigned char const* input, size_type length );
189 
190  // F, G, H and I are basic MD5 functions.
191  static inline uint32_t F_(uint32_t x, uint32_t y, uint32_t z);
192  static inline uint32_t G_(uint32_t x, uint32_t y, uint32_t z);
193  static inline uint32_t H_(uint32_t x, uint32_t y, uint32_t z);
194  static inline uint32_t I_(uint32_t x, uint32_t y, uint32_t z);
195 
196  // rotates x left n bits.
197  static inline uint32_t rotate_left_(uint32_t x, int n);
198 
199  // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
200  // Rotation is separate from addition to prevent recomputation.
201  static inline void FF_(
202  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
203  );
204  static inline void GG_(
205  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
206  );
207  static inline void HH_(
208  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
209  );
210  static inline void II_(
211  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
212  );
213 
214  // apply MD5 algo on a block
215  void transform_( const uint8_t block[MD5::BlockSize] );
216 
217  // decodes input (unsigned char) into output (uint32_t). Assumes len is a multiple of 4.
218  static void decode_( uint32_t output[], const uint8_t input[], size_type len );
219 
220  // encodes input (uint32_t) into output (unsigned char). Assumes len is a multiple of 4.
221  static void encode_( uint8_t output[], const uint32_t input[], size_type len );
222 
223  // -------------------------------------------------------------------------
224  // Data Members
225  // -------------------------------------------------------------------------
226 
227 private:
228 
229  uint8_t buffer_[MD5::BlockSize]; // bytes that didn't fit in last 64 byte chunk
230  uint32_t count_[2]; // 64bit counter for number of bits (lo, hi)
231  uint32_t state_[4]; // digest so far
232 
233  DigestType digest_;
234 
235 };
236 
237 } // namespace utils
238 } // namespace genesis
239 
240 // ================================================================================================
241 // Standard Hash Function
242 // ================================================================================================
243 
244 namespace std
245 {
252  template<>
253  struct hash<genesis::utils::MD5::DigestType>
254  {
256  using result_type = std::size_t;
257 
258  // We use two intermediate hashes to allow better optimization.
260  result_type hash1 = 0;
261  result_type hash2 = 0;
262  hash1 ^= ( static_cast<result_type>( s[0] ) << 0 );
263  hash1 ^= ( static_cast<result_type>( s[1] ) << 8 );
264  hash1 ^= ( static_cast<result_type>( s[2] ) << 16 );
265  hash1 ^= ( static_cast<result_type>( s[3] ) << 24 );
266  hash1 ^= ( static_cast<result_type>( s[4] ) << 32 );
267  hash1 ^= ( static_cast<result_type>( s[5] ) << 40 );
268  hash1 ^= ( static_cast<result_type>( s[6] ) << 48 );
269  hash1 ^= ( static_cast<result_type>( s[7] ) << 56 );
270  hash2 ^= ( static_cast<result_type>( s[8] ) << 0 );
271  hash2 ^= ( static_cast<result_type>( s[9] ) << 8 );
272  hash2 ^= ( static_cast<result_type>( s[10] ) << 16 );
273  hash2 ^= ( static_cast<result_type>( s[11] ) << 24 );
274  hash2 ^= ( static_cast<result_type>( s[12] ) << 32 );
275  hash2 ^= ( static_cast<result_type>( s[13] ) << 40 );
276  hash2 ^= ( static_cast<result_type>( s[14] ) << 48 );
277  hash2 ^= ( static_cast<result_type>( s[15] ) << 56 );
278  return hash1 ^ hash2;
279  }
280  };
281 }
282 
283 #endif // include guard
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:82
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: md5.cpp:164
uint32_t size_type
Definition: md5.hpp:90
MD5()
Initialize the object for use.
Definition: md5.cpp:84
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:127
static DigestType from_stream_digest(std::istream &is)
Calculate the hash digest for the content of a stream.
Definition: md5.cpp:201
static std::string from_stream_hex(std::istream &is)
Calculate the checksum for the content of a stream.
Definition: md5.cpp:194
genesis::utils::MD5::DigestType argument_type
Definition: md5.hpp:255
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: md5.cpp:98
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: md5.cpp:172
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:101
result_type operator()(argument_type const &s) const
Definition: md5.hpp:259
static const size_t BlockSize
Definition: md5.hpp:92
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:93
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:133
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:220
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: md5.cpp:187
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: md5.cpp:180
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:208
MD5 & operator=(MD5 const &)=default