A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sha256.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  SHA256 License
34  =================================================================
35 
36  Updated to C++, zedwood.com 2012
37  Based on Olivier Gay's version
38  See Modified BSD License below:
39 
40  FIPS 180-2 SHA-224/256/384/512 implementation
41  Issue date: 04/30/2005
42  http://www.ouah.org/ogay/sha2/
43 
44  Copyright (C) 2005, 2007 Olivier Gay <olivier.gay@a3.epfl.ch>
45  All rights reserved.
46 
47  Redistribution and use in source and binary forms, with or without
48  modification, are permitted provided that the following conditions
49  are met:
50  1. Redistributions of source code must retain the above copyright
51  notice, this list of conditions and the following disclaimer.
52  2. Redistributions in binary form must reproduce the above copyright
53  notice, this list of conditions and the following disclaimer in the
54  documentation and/or other materials provided with the distribution.
55  3. Neither the name of the project nor the names of its contributors
56  may be used to endorse or promote products derived from this software
57  without specific prior written permission.
58 
59  THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
60  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
63  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  SUCH DAMAGE.
70 */
71 
73 
74 #include <algorithm>
75 #include <cinttypes>
76 #include <cstdio>
77 #include <cstring>
78 #include <fstream>
79 #include <iomanip>
80 #include <iostream>
81 #include <sstream>
82 #include <stdexcept>
83 
84 namespace genesis {
85 namespace utils {
86 
87 // ================================================================================================
88 // Constructors and Rule of Five
89 // ================================================================================================
90 
91 const unsigned int SHA256::sha256_k[64] = {
92  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
93  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
94  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
95  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
96  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
97  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
98  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
99  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
100  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
101  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
102  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
103  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
104  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
105  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
106  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
107  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
108 };
109 
114 {
115  reset_();
116 }
117 
118 // ================================================================================================
119 // Member Functions
120 // ================================================================================================
121 
125 void SHA256::update( std::string const& s )
126 {
127  update( s.c_str(), s.size() );
128 }
129 
130 void SHA256::update(std::istream& is)
131 {
132  char sbuf[SHA256::BlockSize];
133  while (true) {
134 
135  // Read a block and use it for an update.
136  is.read( sbuf, SHA256::BlockSize );
137  size_t cnt = is.gcount();
138  update( sbuf, cnt );
139 
140  // If we didn't get a full block, the stream is done.
141  if( cnt != SHA256::BlockSize ) {
142  return;
143  }
144  }
145 }
146 
147 void SHA256::update( char const* input, size_t length )
148 {
149  // Ugly conversion, but still better than the silent one used in the original code.
150  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
151  update_( in_uchar, length);
152 }
153 
157 std::string SHA256::final_hex()
158 {
159  // Calculate digest, also reset for next use.
160  return digest_to_hex( final_digest() );
161 }
162 
167 {
168  unsigned int block_nb;
169  unsigned int pm_len;
170  unsigned int len_b;
171  block_nb = (1 + ((SHA256::BlockSize - 9) < (len_ % SHA256::BlockSize)));
172  len_b = (tot_len_ + len_) << 3;
173  pm_len = block_nb << 6;
174  memset(block_ + len_, 0, pm_len - len_);
175  block_[len_] = 0x80;
176  SHA2_UNPACK32(len_b, block_ + pm_len - 4);
177  transform_(block_, block_nb);
178 
179  // Turn into byte array. Might be useful later.
180  // unsigned char digest[SHA256::DigestSize];
181  // memset(digest,0,SHA256::DigestSize);
182  // for (size_t i = 0 ; i < 8; i++) {
183  // SHA2_UNPACK32(digest_[i], &digest[i << 2]);
184  // }
185 
186  auto const result = digest_;
187  reset_();
188  return result;
189 }
190 
194 std::string SHA256::from_file_hex( std::string const& filename )
195 {
196  std::ifstream stream( filename.c_str(), std::ios::binary );
197  SHA256 checksum;
198  checksum.update(stream);
199  return checksum.final_hex();
200 }
201 
205 SHA256::DigestType SHA256::from_file_digest( std::string const& filename )
206 {
207  std::ifstream stream( filename.c_str(), std::ios::binary );
208  SHA256 checksum;
209  checksum.update(stream);
210  return checksum.final_digest();
211 }
212 
216 std::string SHA256::from_string_hex( std::string const& input )
217 {
218  SHA256 checksum;
219  checksum.update( input );
220  return checksum.final_hex();
221 }
222 
227 {
228  SHA256 checksum;
229  checksum.update( input );
230  return checksum.final_digest();
231 }
232 
233 std::string SHA256::digest_to_hex( SHA256::DigestType const& digest )
234 {
235  // Simple version. Equally as fast as the printf one below.
236  std::ostringstream result;
237  for (size_t i = 0; i < digest.size(); ++i) {
238  result << std::hex << std::setfill('0') << std::setw(8);
239  result << static_cast<int>( digest[i] );
240  }
241  return result.str();
242 
243  // Print bytes to string.
244  // char buf[ 2 * DigestSize + 1 ];
245  // buf[ 2 * DigestSize ] = '\0';
246  // for( size_t i = 0; i < 8; i++ ) {
247  // sprintf( buf + i * 8, "%08x", digest[i] );
248  // }
249  // return std::string(buf);
250 }
251 
252 SHA256::DigestType SHA256::hex_to_digest( std::string const& hex )
253 {
254  // Safety first!
255  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
256  return std::isxdigit( c );
257  });
258  if( hex.size() != 64 || ! all_hex ) {
259  throw std::runtime_error( "Invalid SHA256 hex string." );
260  }
261 
262  // The following test was introduced to check the scanf format "%8x",
263  // which just is an "unsigned int", which is not a fixed size.
264  // We now use the SCNxN typedefs that offer fixed with replacements, see
265  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
266 
267  // Make sure that the scan works! Need to have 32 bit type.
268  // static_assert(
269  // sizeof( unsigned int ) == 4,
270  // "Cannot compile SHA256::hex_to_digest() with sizeof( unsigned int ) != 4"
271  // );
272 
273  // Convert.
274  SHA256::DigestType result;
275  for (size_t i = 0; i < result.size(); ++i) {
276  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
277  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
278  if( n != 1 ) {
279  throw std::runtime_error( "Invalid SHA256 hex string." );
280  }
281  }
282 
283  return result;
284 }
285 
286 // ================================================================================================
287 // Internal Functions
288 // ================================================================================================
289 
290 void SHA256::reset_()
291 {
292  digest_[0] = 0x6a09e667;
293  digest_[1] = 0xbb67ae85;
294  digest_[2] = 0x3c6ef372;
295  digest_[3] = 0xa54ff53a;
296  digest_[4] = 0x510e527f;
297  digest_[5] = 0x9b05688c;
298  digest_[6] = 0x1f83d9ab;
299  digest_[7] = 0x5be0cd19;
300  len_ = 0;
301  tot_len_ = 0;
302 }
303 
304 void SHA256::update_( unsigned char const* message, size_t len )
305 {
306  unsigned int block_nb;
307  unsigned int new_len, rem_len, tmp_len;
308  const unsigned char *shifted_message;
309  tmp_len = SHA256::BlockSize - len_;
310  rem_len = len < tmp_len ? len : tmp_len;
311  memcpy( &block_[len_], message, rem_len );
312  if (len_ + len < SHA256::BlockSize) {
313  len_ += len;
314  return;
315  }
316  new_len = len - rem_len;
317  block_nb = new_len / SHA256::BlockSize;
318  shifted_message = message + rem_len;
319  transform_( block_, 1);
320  transform_(shifted_message, block_nb);
321  rem_len = new_len % SHA256::BlockSize;
322  memcpy( block_, &shifted_message[block_nb << 6], rem_len);
323  len_ = rem_len;
324  tot_len_ += (block_nb + 1) << 6;
325 }
326 
327 void SHA256::transform_( unsigned char const* message, unsigned int block_nb )
328 {
329  uint32_t w[64];
330  uint32_t wv[8];
331  uint32_t t1, t2;
332  const unsigned char *sub_block;
333  int i;
334  int j;
335  for (i = 0; i < (int) block_nb; i++) {
336  sub_block = message + (i << 6);
337  for (j = 0; j < 16; j++) {
338  w[j] = SHA2_PACK32(&sub_block[j << 2]);
339  }
340  for (j = 16; j < 64; j++) {
341  w[j] = SHA256_F4(w[j - 2]) + w[j - 7] + SHA256_F3(w[j - 15]) + w[j - 16];
342  }
343  for (j = 0; j < 8; j++) {
344  wv[j] = digest_[j];
345  }
346  for (j = 0; j < 64; j++) {
347  t1 = wv[7] + SHA256_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
348  + sha256_k[j] + w[j];
349  t2 = SHA256_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
350  wv[7] = wv[6];
351  wv[6] = wv[5];
352  wv[5] = wv[4];
353  wv[4] = wv[3] + t1;
354  wv[3] = wv[2];
355  wv[2] = wv[1];
356  wv[1] = wv[0];
357  wv[0] = t1 + t2;
358  }
359  for (j = 0; j < 8; j++) {
360  digest_[j] += wv[j];
361  }
362  }
363 }
364 
365 } // namespace utils
366 } // namespace genesis
std::array< uint32_t, 8 > DigestType
Store a SHA256 digest.
Definition: sha256.hpp:114
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha256.cpp:157
Calculate SHA256 hashes for strings and files.
Definition: sha256.hpp:96
static DigestType hex_to_digest(std::string const &hex)
Definition: sha256.cpp:252
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha256.cpp:166
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: sha256.cpp:205
SHA256()
Initialize the object for use.
Definition: sha256.cpp:113
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: sha256.cpp:125
static std::string digest_to_hex(DigestType const &digest)
Definition: sha256.cpp:233
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: sha256.cpp:194
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: sha256.cpp:226
static const size_t BlockSize
Definition: sha256.hpp:104
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: sha256.cpp:216
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.