A library for working with phylogenetic and population genetic data.
v0.27.0
sha256.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 /*
32  =================================================================
33  SHA256 License
34  =================================================================
35 
36  Updated to C++, zedwood.com 2012
37  Based on Olivier Gay's version
38  See Modified BSD License below:
39 
40  FIPS 180-2 SHA-224/256/384/512 implementation
41  Issue date: 04/30/2005
42  http://www.ouah.org/ogay/sha2/
43 
44  Copyright (C) 2005, 2007 Olivier Gay <olivier.gay@a3.epfl.ch>
45  All rights reserved.
46 
47  Redistribution and use in source and binary forms, with or without
48  modification, are permitted provided that the following conditions
49  are met:
50  1. Redistributions of source code must retain the above copyright
51  notice, this list of conditions and the following disclaimer.
52  2. Redistributions in binary form must reproduce the above copyright
53  notice, this list of conditions and the following disclaimer in the
54  documentation and/or other materials provided with the distribution.
55  3. Neither the name of the project nor the names of its contributors
56  may be used to endorse or promote products derived from this software
57  without specific prior written permission.
58 
59  THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
60  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
63  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  SUCH DAMAGE.
70 */
71 
73 
75 
76 // Apparently, for some compilers, the folllowing definition has to be set in order for <cinttypes>
77 // to include the scanf format types, see https://stackoverflow.com/a/30851225/4184258
78 #define __STDC_FORMAT_MACROS
79 
80 #include <algorithm>
81 #include <cinttypes>
82 #include <cstdio>
83 #include <cstring>
84 #include <fstream>
85 #include <iomanip>
86 #include <iostream>
87 #include <sstream>
88 #include <stdexcept>
89 
90 // In the case that the above inclusion of <cinttypes> still did not give us the scanf format types
91 // that we need, we define it ourselves, see https://helpmanual.io/man3/SCNx32-avr/
92 // but also emit a message to show that this is the case, as a hint for debugging.
93 #ifndef SCNx32
94  // hexadecimal scanf format for uint32_t
95  #define SCNx32 'lx'
96  #pragma message ( \
97  "<inttypes.h> did not provide a definition of `SCNx32`, " \
98  "which we hence define here as `lx`" \
99  )
100 #endif
101 
102 namespace genesis {
103 namespace utils {
104 
105 // ================================================================================================
106 // Constructors and Rule of Five
107 // ================================================================================================
108 
109 const unsigned int SHA256::sha256_k[64] = {
110  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
111  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
112  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
113  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
114  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
115  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
116  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
117  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
118  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
119  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
120  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
121  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
122  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
123  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
124  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
125  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
126 };
127 
129 {
130  reset_();
131 }
132 
133 // ================================================================================================
134 // Full Hashing
135 // ================================================================================================
136 
137 std::string SHA256::read_hex( std::shared_ptr<BaseInputSource> source )
138 {
139  SHA256 checksum;
140  checksum.update( source );
141  return checksum.final_hex();
142 }
143 
144 SHA256::DigestType SHA256::read_digest( std::shared_ptr<BaseInputSource> source )
145 {
146  SHA256 checksum;
147  checksum.update( source );
148  return checksum.final_digest();
149 }
150 
151 std::string SHA256::digest_to_hex( SHA256::DigestType const& digest )
152 {
153  // Simple version. Equally as fast as the printf one below.
154  std::ostringstream result;
155  for (size_t i = 0; i < digest.size(); ++i) {
156  result << std::hex << std::setfill('0') << std::setw(8);
157  result << static_cast<int>( digest[i] );
158  }
159  return result.str();
160 
161  // Print bytes to string.
162  // char buf[ 2 * DigestSize + 1 ];
163  // buf[ 2 * DigestSize ] = '\0';
164  // for( size_t i = 0; i < 8; i++ ) {
165  // sprintf( buf + i * 8, "%08x", digest[i] );
166  // }
167  // return std::string(buf);
168 }
169 
170 SHA256::DigestType SHA256::hex_to_digest( std::string const& hex )
171 {
172  // Safety first!
173  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
174  return std::isxdigit( c );
175  });
176  if( hex.size() != 64 || ! all_hex ) {
177  throw std::runtime_error( "Invalid SHA256 hex string." );
178  }
179 
180  // The following test was introduced to check the scanf format "%8x",
181  // which just is an "unsigned int", which is not a fixed size.
182  // We now use the SCNxN typedefs that offer fixed width replacements, see
183  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
184 
185  // Make sure that the scan works! Need to have 32 bit type.
186  // static_assert(
187  // sizeof( unsigned int ) == 4,
188  // "Cannot compile SHA256::hex_to_digest() with sizeof( unsigned int ) != 4"
189  // );
190 
191  // Convert.
192  SHA256::DigestType result;
193  for (size_t i = 0; i < result.size(); ++i) {
194  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
195  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
196  if( n != 1 ) {
197  throw std::runtime_error( "Invalid SHA256 hex string." );
198  }
199  }
200 
201  return result;
202 }
203 
204 // ================================================================================================
205 // Iterative Hashing
206 // ================================================================================================
207 
209 {
210  reset_();
211 }
212 
213 void SHA256::update( std::shared_ptr<BaseInputSource> source )
214 {
215  auto ib = InputBuffer( source );
216  char sbuf[SHA256::BlockSize];
217 
218  while (true) {
219 
220  // Read a block and use it for an update.
221  auto count = ib.read( sbuf, SHA256::BlockSize );
222  update( sbuf, count );
223 
224  // If we didn't get a full block, the stream is done.
225  if( count != SHA256::BlockSize ) {
226  return;
227  }
228  }
229 }
230 
231 void SHA256::update( std::string const& s )
232 {
233  update( s.c_str(), s.size() );
234 }
235 
236 void SHA256::update( std::istream& is )
237 {
238  char sbuf[SHA256::BlockSize];
239  while (true) {
240 
241  // Read a block and use it for an update.
242  is.read( sbuf, SHA256::BlockSize );
243  size_t cnt = is.gcount();
244  update( sbuf, cnt );
245 
246  // If we didn't get a full block, the stream is done.
247  if( cnt != SHA256::BlockSize ) {
248  return;
249  }
250  }
251 }
252 
253 void SHA256::update( char const* input, size_t length )
254 {
255  // Ugly conversion, but still better than the silent one used in the original code.
256  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
257  update_( in_uchar, length);
258 }
259 
260 std::string SHA256::final_hex()
261 {
262  // Calculate digest, also reset for next use.
263  return digest_to_hex( final_digest() );
264 }
265 
267 {
268  unsigned int block_nb;
269  unsigned int pm_len;
270  unsigned int len_b;
271  block_nb = (1 + ((SHA256::BlockSize - 9) < (len_ % SHA256::BlockSize)));
272  len_b = (tot_len_ + len_) << 3;
273  pm_len = block_nb << 6;
274  memset(block_ + len_, 0, pm_len - len_);
275  block_[len_] = 0x80;
276  SHA2_UNPACK32(len_b, block_ + pm_len - 4);
277  transform_(block_, block_nb);
278 
279  // Turn into byte array. Might be useful later.
280  // unsigned char digest[SHA256::DigestSize];
281  // memset(digest,0,SHA256::DigestSize);
282  // for (size_t i = 0 ; i < 8; i++) {
283  // SHA2_UNPACK32(digest_[i], &digest[i << 2]);
284  // }
285 
286  auto const result = digest_;
287  reset_();
288  return result;
289 }
290 
291 // ================================================================================================
292 // Internal Functions
293 // ================================================================================================
294 
295 void SHA256::reset_()
296 {
297  digest_[0] = 0x6a09e667;
298  digest_[1] = 0xbb67ae85;
299  digest_[2] = 0x3c6ef372;
300  digest_[3] = 0xa54ff53a;
301  digest_[4] = 0x510e527f;
302  digest_[5] = 0x9b05688c;
303  digest_[6] = 0x1f83d9ab;
304  digest_[7] = 0x5be0cd19;
305  len_ = 0;
306  tot_len_ = 0;
307 }
308 
309 void SHA256::update_( unsigned char const* message, size_t len )
310 {
311  unsigned int block_nb;
312  unsigned int new_len, rem_len, tmp_len;
313  const unsigned char *shifted_message;
314  tmp_len = SHA256::BlockSize - len_;
315  rem_len = len < tmp_len ? len : tmp_len;
316  memcpy( &block_[len_], message, rem_len );
317  if (len_ + len < SHA256::BlockSize) {
318  len_ += len;
319  return;
320  }
321  new_len = len - rem_len;
322  block_nb = new_len / SHA256::BlockSize;
323  shifted_message = message + rem_len;
324  transform_( block_, 1);
325  transform_(shifted_message, block_nb);
326  rem_len = new_len % SHA256::BlockSize;
327  memcpy( block_, &shifted_message[block_nb << 6], rem_len);
328  len_ = rem_len;
329  tot_len_ += (block_nb + 1) << 6;
330 }
331 
332 void SHA256::transform_( unsigned char const* message, unsigned int block_nb )
333 {
334  uint32_t w[64];
335  uint32_t wv[8];
336  uint32_t t1, t2;
337  const unsigned char *sub_block;
338  int i;
339  int j;
340  for (i = 0; i < (int) block_nb; i++) {
341  sub_block = message + (i << 6);
342  for (j = 0; j < 16; j++) {
343  w[j] = SHA2_PACK32(&sub_block[j << 2]);
344  }
345  for (j = 16; j < 64; j++) {
346  w[j] = SHA256_F4(w[j - 2]) + w[j - 7] + SHA256_F3(w[j - 15]) + w[j - 16];
347  }
348  for (j = 0; j < 8; j++) {
349  wv[j] = digest_[j];
350  }
351  for (j = 0; j < 64; j++) {
352  t1 = wv[7] + SHA256_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
353  + sha256_k[j] + w[j];
354  t2 = SHA256_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
355  wv[7] = wv[6];
356  wv[6] = wv[5];
357  wv[5] = wv[4];
358  wv[4] = wv[3] + t1;
359  wv[3] = wv[2];
360  wv[2] = wv[1];
361  wv[1] = wv[0];
362  wv[0] = t1 + t2;
363  }
364  for (j = 0; j < 8; j++) {
365  digest_[j] += wv[j];
366  }
367  }
368 }
369 
370 } // namespace utils
371 } // namespace genesis
genesis::utils::SHA256::DigestType
std::array< uint32_t, 8 > DigestType
Store a SHA256 digest.
Definition: sha256.hpp:120
genesis::utils::SHA256::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha256.cpp:260
SCNx32
#define SCNx32
Definition: sha256.cpp:95
genesis::utils::SHA256::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: sha256.cpp:144
genesis::utils::SHA256::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha256.cpp:208
genesis::utils::SHA256::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: sha256.cpp:170
genesis::utils::SHA256::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: sha256.cpp:213
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::SHA256::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha256.cpp:266
genesis::utils::SHA256::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: sha256.cpp:137
genesis::utils::SHA256::SHA256
SHA256()
Initialize the object for use.
Definition: sha256.cpp:128
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::SHA256::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: sha256.cpp:151
genesis::utils::SHA256::BlockSize
static const size_t BlockSize
Definition: sha256.hpp:110
genesis::utils::SHA256
Calculate SHA256 hashes for strings and files.
Definition: sha256.hpp:102
genesis::utils::InputBuffer
Definition: input_buffer.hpp:49
input_buffer.hpp
sha256.hpp