A library for working with phylogenetic and population genetic data.
v0.32.0
md5.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_TOOLS_HASH_MD5_H_
2 #define GENESIS_UTILS_TOOLS_HASH_MD5_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <array>
37 #include <cstdint>
38 #include <iosfwd>
39 #include <string>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // ================================================================================================
45 // MD5
46 // ================================================================================================
47 
88 class MD5
89 {
90 public:
91 
92  // -------------------------------------------------------------------------
93  // Typedefs and Constants
94  // -------------------------------------------------------------------------
95 
96  using size_type = uint32_t;
97 
98  static const size_t BlockSize = 64;
99 
107  using DigestType = std::array< uint8_t, 16 >;
108 
109  // -------------------------------------------------------------------------
110  // Constructors and Rule of Five
111  // -------------------------------------------------------------------------
112 
116  MD5();
117  ~MD5() = default;
118 
119  MD5( MD5 const& ) = default;
120  MD5( MD5&& ) = default;
121 
122  MD5& operator= ( MD5 const& ) = default;
123  MD5& operator= ( MD5&& ) = default;
124 
125  // -------------------------------------------------------------------------
126  // Full Hashing
127  // -------------------------------------------------------------------------
128 
132  static std::string read_hex( std::shared_ptr<BaseInputSource> source );
133 
137  static DigestType read_digest( std::shared_ptr<BaseInputSource> source );
138 
139  static std::string digest_to_hex( DigestType const& digest );
140  static DigestType hex_to_digest( std::string const& hex );
141 
142  // -------------------------------------------------------------------------
143  // Iterative Hashing
144  // -------------------------------------------------------------------------
145 
149  void clear();
150 
151  void update( std::shared_ptr<BaseInputSource> source );
152  void update( std::string const& s );
153  void update( std::istream& is );
154  void update( char const* input, size_type length );
155 
159  std::string final_hex();
160 
165 
166  // -------------------------------------------------------------------------
167  // Internal Functions
168  // -------------------------------------------------------------------------
169 
170 private:
171 
172  void reset_();
173 
174  // MD5 block update operation. Continues an MD5 message-digest
175  // operation, processing another message block
176  void update_( unsigned char const* input, size_type length );
177 
178  // F, G, H and I are basic MD5 functions.
179  static inline uint32_t F_(uint32_t x, uint32_t y, uint32_t z);
180  static inline uint32_t G_(uint32_t x, uint32_t y, uint32_t z);
181  static inline uint32_t H_(uint32_t x, uint32_t y, uint32_t z);
182  static inline uint32_t I_(uint32_t x, uint32_t y, uint32_t z);
183 
184  // rotates x left n bits.
185  static inline uint32_t rotate_left_(uint32_t x, int n);
186 
187  // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
188  // Rotation is separate from addition to prevent recomputation.
189  static inline void FF_(
190  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
191  );
192  static inline void GG_(
193  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
194  );
195  static inline void HH_(
196  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
197  );
198  static inline void II_(
199  uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t s, uint32_t ac
200  );
201 
202  // apply MD5 algo on a block
203  void transform_( const uint8_t block[MD5::BlockSize] );
204 
205  // decodes input (unsigned char) into output (uint32_t). Assumes len is a multiple of 4.
206  static void decode_( uint32_t output[], const uint8_t input[], size_type len );
207 
208  // encodes input (uint32_t) into output (unsigned char). Assumes len is a multiple of 4.
209  static void encode_( uint8_t output[], const uint32_t input[], size_type len );
210 
211  // -------------------------------------------------------------------------
212  // Data Members
213  // -------------------------------------------------------------------------
214 
215 private:
216 
217  uint8_t buffer_[MD5::BlockSize]; // bytes that didn't fit in last 64 byte chunk
218  uint32_t count_[2]; // 64bit counter for number of bits (lo, hi)
219  uint32_t state_[4]; // digest so far
220 
221  DigestType digest_;
222 
223 };
224 
225 } // namespace utils
226 } // namespace genesis
227 
228 // ================================================================================================
229 // Standard Hash Function
230 // ================================================================================================
231 
232 namespace std
233 {
240  template<>
241  struct hash<genesis::utils::MD5::DigestType>
242  {
244  using result_type = std::size_t;
245 
246  // We use two intermediate hashes to allow better optimization.
248  result_type hash1 = 0;
249  result_type hash2 = 0;
250  hash1 ^= ( static_cast<result_type>( s[0] ) << 0 );
251  hash1 ^= ( static_cast<result_type>( s[1] ) << 8 );
252  hash1 ^= ( static_cast<result_type>( s[2] ) << 16 );
253  hash1 ^= ( static_cast<result_type>( s[3] ) << 24 );
254  hash1 ^= ( static_cast<result_type>( s[4] ) << 32 );
255  hash1 ^= ( static_cast<result_type>( s[5] ) << 40 );
256  hash1 ^= ( static_cast<result_type>( s[6] ) << 48 );
257  hash1 ^= ( static_cast<result_type>( s[7] ) << 56 );
258  hash2 ^= ( static_cast<result_type>( s[8] ) << 0 );
259  hash2 ^= ( static_cast<result_type>( s[9] ) << 8 );
260  hash2 ^= ( static_cast<result_type>( s[10] ) << 16 );
261  hash2 ^= ( static_cast<result_type>( s[11] ) << 24 );
262  hash2 ^= ( static_cast<result_type>( s[12] ) << 32 );
263  hash2 ^= ( static_cast<result_type>( s[13] ) << 40 );
264  hash2 ^= ( static_cast<result_type>( s[14] ) << 48 );
265  hash2 ^= ( static_cast<result_type>( s[15] ) << 56 );
266  return hash1 ^ hash2;
267  }
268  };
269 }
270 
271 #endif // include guard
genesis::utils::MD5
Calculate MD5 hashes for strings and files.
Definition: md5.hpp:88
genesis::utils::MD5::~MD5
~MD5()=default
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::MD5::BlockSize
static const size_t BlockSize
Definition: md5.hpp:98
genesis::utils::MD5::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: md5.cpp:160
genesis::utils::MD5::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: md5.cpp:96
input_source.hpp
genesis::utils::MD5::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: md5.cpp:122
std::hash< genesis::utils::MD5::DigestType >::operator()
result_type operator()(argument_type const &s) const
Definition: md5.hpp:247
genesis::utils::MD5::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: md5.cpp:155
genesis::utils::MD5::DigestType
std::array< uint8_t, 16 > DigestType
Store an MD5 digest.
Definition: md5.hpp:107
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
std::hash< genesis::utils::MD5::DigestType >::argument_type
genesis::utils::MD5::DigestType argument_type
Definition: md5.hpp:243
std::hash< genesis::utils::MD5::DigestType >::result_type
std::size_t result_type
Definition: md5.hpp:244
genesis::utils::MD5::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: md5.cpp:110
genesis::utils::MD5::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: md5.cpp:213
genesis::utils::MD5::MD5
MD5()
Initialize the object for use.
Definition: md5.cpp:87
genesis::utils::MD5::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: md5.cpp:207
genesis::utils::MD5::operator=
MD5 & operator=(MD5 const &)=default
genesis::utils::MD5::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: md5.cpp:103
genesis::utils::MD5::size_type
uint32_t size_type
Definition: md5.hpp:96