A toolkit for working with phylogenetic data.
v0.24.0
sha256.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  SHA256 License
34  =================================================================
35 
36  Updated to C++, zedwood.com 2012
37  Based on Olivier Gay's version
38  See Modified BSD License below:
39 
40  FIPS 180-2 SHA-224/256/384/512 implementation
41  Issue date: 04/30/2005
42  http://www.ouah.org/ogay/sha2/
43 
44  Copyright (C) 2005, 2007 Olivier Gay <olivier.gay@a3.epfl.ch>
45  All rights reserved.
46 
47  Redistribution and use in source and binary forms, with or without
48  modification, are permitted provided that the following conditions
49  are met:
50  1. Redistributions of source code must retain the above copyright
51  notice, this list of conditions and the following disclaimer.
52  2. Redistributions in binary form must reproduce the above copyright
53  notice, this list of conditions and the following disclaimer in the
54  documentation and/or other materials provided with the distribution.
55  3. Neither the name of the project nor the names of its contributors
56  may be used to endorse or promote products derived from this software
57  without specific prior written permission.
58 
59  THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
60  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
63  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  SUCH DAMAGE.
70 */
71 
73 
75 
76 #include <algorithm>
77 #include <cinttypes>
78 #include <cstdio>
79 #include <cstring>
80 #include <fstream>
81 #include <iomanip>
82 #include <iostream>
83 #include <sstream>
84 #include <stdexcept>
85 
86 namespace genesis {
87 namespace utils {
88 
89 // ================================================================================================
90 // Constructors and Rule of Five
91 // ================================================================================================
92 
93 const unsigned int SHA256::sha256_k[64] = {
94  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
95  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
96  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
97  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
98  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
99  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
100  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
101  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
102  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
103  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
104  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
105  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
106  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
107  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
108  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
109  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
110 };
111 
113 {
114  reset_();
115 }
116 
117 // ================================================================================================
118 // Full Hashing
119 // ================================================================================================
120 
121 std::string SHA256::read_hex( std::shared_ptr<BaseInputSource> source )
122 {
123  SHA256 checksum;
124  checksum.update( source );
125  return checksum.final_hex();
126 }
127 
128 SHA256::DigestType SHA256::read_digest( std::shared_ptr<BaseInputSource> source )
129 {
130  SHA256 checksum;
131  checksum.update( source );
132  return checksum.final_digest();
133 }
134 
135 std::string SHA256::digest_to_hex( SHA256::DigestType const& digest )
136 {
137  // Simple version. Equally as fast as the printf one below.
138  std::ostringstream result;
139  for (size_t i = 0; i < digest.size(); ++i) {
140  result << std::hex << std::setfill('0') << std::setw(8);
141  result << static_cast<int>( digest[i] );
142  }
143  return result.str();
144 
145  // Print bytes to string.
146  // char buf[ 2 * DigestSize + 1 ];
147  // buf[ 2 * DigestSize ] = '\0';
148  // for( size_t i = 0; i < 8; i++ ) {
149  // sprintf( buf + i * 8, "%08x", digest[i] );
150  // }
151  // return std::string(buf);
152 }
153 
154 SHA256::DigestType SHA256::hex_to_digest( std::string const& hex )
155 {
156  // Safety first!
157  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
158  return std::isxdigit( c );
159  });
160  if( hex.size() != 64 || ! all_hex ) {
161  throw std::runtime_error( "Invalid SHA256 hex string." );
162  }
163 
164  // The following test was introduced to check the scanf format "%8x",
165  // which just is an "unsigned int", which is not a fixed size.
166  // We now use the SCNxN typedefs that offer fixed width replacements, see
167  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
168 
169  // Make sure that the scan works! Need to have 32 bit type.
170  // static_assert(
171  // sizeof( unsigned int ) == 4,
172  // "Cannot compile SHA256::hex_to_digest() with sizeof( unsigned int ) != 4"
173  // );
174 
175  // Convert.
176  SHA256::DigestType result;
177  for (size_t i = 0; i < result.size(); ++i) {
178  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
179  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
180  if( n != 1 ) {
181  throw std::runtime_error( "Invalid SHA256 hex string." );
182  }
183  }
184 
185  return result;
186 }
187 
188 // ================================================================================================
189 // Iterative Hashing
190 // ================================================================================================
191 
193 {
194  reset_();
195 }
196 
197 void SHA256::update( std::shared_ptr<BaseInputSource> source )
198 {
199  auto ib = InputBuffer( source );
200  char sbuf[SHA256::BlockSize];
201 
202  while (true) {
203 
204  // Read a block and use it for an update.
205  auto count = ib.read( sbuf, SHA256::BlockSize );
206  update( sbuf, count );
207 
208  // If we didn't get a full block, the stream is done.
209  if( count != SHA256::BlockSize ) {
210  return;
211  }
212  }
213 }
214 
215 void SHA256::update( std::string const& s )
216 {
217  update( s.c_str(), s.size() );
218 }
219 
220 void SHA256::update( std::istream& is )
221 {
222  char sbuf[SHA256::BlockSize];
223  while (true) {
224 
225  // Read a block and use it for an update.
226  is.read( sbuf, SHA256::BlockSize );
227  size_t cnt = is.gcount();
228  update( sbuf, cnt );
229 
230  // If we didn't get a full block, the stream is done.
231  if( cnt != SHA256::BlockSize ) {
232  return;
233  }
234  }
235 }
236 
237 void SHA256::update( char const* input, size_t length )
238 {
239  // Ugly conversion, but still better than the silent one used in the original code.
240  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
241  update_( in_uchar, length);
242 }
243 
244 std::string SHA256::final_hex()
245 {
246  // Calculate digest, also reset for next use.
247  return digest_to_hex( final_digest() );
248 }
249 
251 {
252  unsigned int block_nb;
253  unsigned int pm_len;
254  unsigned int len_b;
255  block_nb = (1 + ((SHA256::BlockSize - 9) < (len_ % SHA256::BlockSize)));
256  len_b = (tot_len_ + len_) << 3;
257  pm_len = block_nb << 6;
258  memset(block_ + len_, 0, pm_len - len_);
259  block_[len_] = 0x80;
260  SHA2_UNPACK32(len_b, block_ + pm_len - 4);
261  transform_(block_, block_nb);
262 
263  // Turn into byte array. Might be useful later.
264  // unsigned char digest[SHA256::DigestSize];
265  // memset(digest,0,SHA256::DigestSize);
266  // for (size_t i = 0 ; i < 8; i++) {
267  // SHA2_UNPACK32(digest_[i], &digest[i << 2]);
268  // }
269 
270  auto const result = digest_;
271  reset_();
272  return result;
273 }
274 
275 // ================================================================================================
276 // Internal Functions
277 // ================================================================================================
278 
279 void SHA256::reset_()
280 {
281  digest_[0] = 0x6a09e667;
282  digest_[1] = 0xbb67ae85;
283  digest_[2] = 0x3c6ef372;
284  digest_[3] = 0xa54ff53a;
285  digest_[4] = 0x510e527f;
286  digest_[5] = 0x9b05688c;
287  digest_[6] = 0x1f83d9ab;
288  digest_[7] = 0x5be0cd19;
289  len_ = 0;
290  tot_len_ = 0;
291 }
292 
293 void SHA256::update_( unsigned char const* message, size_t len )
294 {
295  unsigned int block_nb;
296  unsigned int new_len, rem_len, tmp_len;
297  const unsigned char *shifted_message;
298  tmp_len = SHA256::BlockSize - len_;
299  rem_len = len < tmp_len ? len : tmp_len;
300  memcpy( &block_[len_], message, rem_len );
301  if (len_ + len < SHA256::BlockSize) {
302  len_ += len;
303  return;
304  }
305  new_len = len - rem_len;
306  block_nb = new_len / SHA256::BlockSize;
307  shifted_message = message + rem_len;
308  transform_( block_, 1);
309  transform_(shifted_message, block_nb);
310  rem_len = new_len % SHA256::BlockSize;
311  memcpy( block_, &shifted_message[block_nb << 6], rem_len);
312  len_ = rem_len;
313  tot_len_ += (block_nb + 1) << 6;
314 }
315 
316 void SHA256::transform_( unsigned char const* message, unsigned int block_nb )
317 {
318  uint32_t w[64];
319  uint32_t wv[8];
320  uint32_t t1, t2;
321  const unsigned char *sub_block;
322  int i;
323  int j;
324  for (i = 0; i < (int) block_nb; i++) {
325  sub_block = message + (i << 6);
326  for (j = 0; j < 16; j++) {
327  w[j] = SHA2_PACK32(&sub_block[j << 2]);
328  }
329  for (j = 16; j < 64; j++) {
330  w[j] = SHA256_F4(w[j - 2]) + w[j - 7] + SHA256_F3(w[j - 15]) + w[j - 16];
331  }
332  for (j = 0; j < 8; j++) {
333  wv[j] = digest_[j];
334  }
335  for (j = 0; j < 64; j++) {
336  t1 = wv[7] + SHA256_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
337  + sha256_k[j] + w[j];
338  t2 = SHA256_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
339  wv[7] = wv[6];
340  wv[6] = wv[5];
341  wv[5] = wv[4];
342  wv[4] = wv[3] + t1;
343  wv[3] = wv[2];
344  wv[2] = wv[1];
345  wv[1] = wv[0];
346  wv[0] = t1 + t2;
347  }
348  for (j = 0; j < 8; j++) {
349  digest_[j] += wv[j];
350  }
351  }
352 }
353 
354 } // namespace utils
355 } // namespace genesis
std::array< uint32_t, 8 > DigestType
Store a SHA256 digest.
Definition: sha256.hpp:120
void update(std::shared_ptr< BaseInputSource > source)
Definition: sha256.cpp:197
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha256.cpp:244
Calculate SHA256 hashes for strings and files.
Definition: sha256.hpp:102
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: sha256.cpp:121
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha256.cpp:192
static DigestType hex_to_digest(std::string const &hex)
Definition: sha256.cpp:154
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha256.cpp:250
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
SHA256()
Initialize the object for use.
Definition: sha256.cpp:112
static std::string digest_to_hex(DigestType const &digest)
Definition: sha256.cpp:135
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
static const size_t BlockSize
Definition: sha256.hpp:110
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: sha256.cpp:128