A library for working with phylogenetic and population genetic data.
v0.32.0
sha256.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2023 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
31 /*
32  =================================================================
33  SHA256 License
34  =================================================================
35 
36  Updated to C++, zedwood.com 2012
37  Based on Olivier Gay's version
38  See Modified BSD License below:
39 
40  FIPS 180-2 SHA-224/256/384/512 implementation
41  Issue date: 04/30/2005
42  http://www.ouah.org/ogay/sha2/
43 
44  Copyright (C) 2005, 2007 Olivier Gay <olivier.gay@a3.epfl.ch>
45  All rights reserved.
46 
47  Redistribution and use in source and binary forms, with or without
48  modification, are permitted provided that the following conditions
49  are met:
50  1. Redistributions of source code must retain the above copyright
51  notice, this list of conditions and the following disclaimer.
52  2. Redistributions in binary form must reproduce the above copyright
53  notice, this list of conditions and the following disclaimer in the
54  documentation and/or other materials provided with the distribution.
55  3. Neither the name of the project nor the names of its contributors
56  may be used to endorse or promote products derived from this software
57  without specific prior written permission.
58 
59  THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
60  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
63  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  SUCH DAMAGE.
70 */
71 
73 
75 
76 #include <algorithm>
77 #include <cstdlib>
78 #include <cstdio>
79 #include <cstring>
80 #include <fstream>
81 #include <iomanip>
82 #include <iostream>
83 #include <sstream>
84 #include <stdexcept>
85 
86 namespace genesis {
87 namespace utils {
88 
89 // ================================================================================================
90 // Constructors and Rule of Five
91 // ================================================================================================
92 
93 const unsigned int SHA256::sha256_k[64] = {
94  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
95  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
96  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
97  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
98  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
99  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
100  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
101  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
102  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
103  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
104  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
105  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
106  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
107  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
108  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
109  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
110 };
111 
113 {
114  reset_();
115 }
116 
117 // ================================================================================================
118 // Full Hashing
119 // ================================================================================================
120 
121 std::string SHA256::read_hex( std::shared_ptr<BaseInputSource> source )
122 {
123  SHA256 checksum;
124  checksum.update( source );
125  return checksum.final_hex();
126 }
127 
128 SHA256::DigestType SHA256::read_digest( std::shared_ptr<BaseInputSource> source )
129 {
130  SHA256 checksum;
131  checksum.update( source );
132  return checksum.final_digest();
133 }
134 
135 std::string SHA256::digest_to_hex( SHA256::DigestType const& digest )
136 {
137  // Simple version. Equally as fast as the printf one below.
138  std::ostringstream result;
139  for (size_t i = 0; i < digest.size(); ++i) {
140  result << std::hex << std::setfill('0') << std::setw(8);
141  result << static_cast<int>( digest[i] );
142  }
143  return result.str();
144 
145  // Print bytes to string.
146  // char buf[ 2 * DigestSize + 1 ];
147  // buf[ 2 * DigestSize ] = '\0';
148  // for( size_t i = 0; i < 8; i++ ) {
149  // sprintf( buf + i * 8, "%08x", digest[i] );
150  // }
151  // return std::string(buf);
152 }
153 
154 SHA256::DigestType SHA256::hex_to_digest( std::string const& hex )
155 {
156  // Safety first!
157  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
158  return std::isxdigit( c );
159  });
160  if( hex.size() != 64 || ! all_hex ) {
161  throw std::runtime_error( "Invalid SHA256 hex string." );
162  }
163 
164  // Convert.
165  SHA256::DigestType result;
166  for (size_t i = 0; i < result.size(); ++i) {
167 
168  // Read the symbols into the digest. We tried this before using sscanf(), but that got quite
169  // messy, as the int widths of the hex format macros are not consistent across compilers...
170  // So now we copy the individual fragements to a string. Bit expensive, but a digest is short.
171  std::string const sub = hex.substr( 8*i, 8 );
172  char* endptr;
173  auto const res = strtoul( sub.c_str(), &endptr, 16 );
174  if( *endptr != 0 ) {
175  throw std::runtime_error( "Invalid SHA256 hex string: \"" + hex + "\"" );
176  }
177  result[i] = static_cast<uint32_t>(res);
178  }
179 
180  return result;
181 }
182 
183 // ================================================================================================
184 // Iterative Hashing
185 // ================================================================================================
186 
188 {
189  reset_();
190 }
191 
192 void SHA256::update( std::shared_ptr<BaseInputSource> source )
193 {
194  auto ib = InputBuffer( source );
195  char sbuf[SHA256::BlockSize];
196 
197  while (true) {
198 
199  // Read a block and use it for an update.
200  auto count = ib.read( sbuf, SHA256::BlockSize );
201  update( sbuf, count );
202 
203  // If we didn't get a full block, the stream is done.
204  if( count != SHA256::BlockSize ) {
205  return;
206  }
207  }
208 }
209 
210 void SHA256::update( std::string const& s )
211 {
212  update( s.c_str(), s.size() );
213 }
214 
215 void SHA256::update( std::istream& is )
216 {
217  char sbuf[SHA256::BlockSize];
218  while (true) {
219 
220  // Read a block and use it for an update.
221  is.read( sbuf, SHA256::BlockSize );
222  size_t cnt = is.gcount();
223  update( sbuf, cnt );
224 
225  // If we didn't get a full block, the stream is done.
226  if( cnt != SHA256::BlockSize ) {
227  return;
228  }
229  }
230 }
231 
232 void SHA256::update( char const* input, size_t length )
233 {
234  // Ugly conversion, but still better than the silent one used in the original code.
235  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
236  update_( in_uchar, length);
237 }
238 
239 std::string SHA256::final_hex()
240 {
241  // Calculate digest, also reset for next use.
242  return digest_to_hex( final_digest() );
243 }
244 
246 {
247  unsigned int block_nb;
248  unsigned int pm_len;
249  unsigned int len_b;
250  block_nb = (1 + ((SHA256::BlockSize - 9) < (len_ % SHA256::BlockSize)));
251  len_b = (tot_len_ + len_) << 3;
252  pm_len = block_nb << 6;
253  memset(block_ + len_, 0, pm_len - len_);
254  block_[len_] = 0x80;
255  SHA2_UNPACK32(len_b, block_ + pm_len - 4);
256  transform_(block_, block_nb);
257 
258  // Turn into byte array. Might be useful later.
259  // unsigned char digest[SHA256::DigestSize];
260  // memset(digest,0,SHA256::DigestSize);
261  // for (size_t i = 0 ; i < 8; i++) {
262  // SHA2_UNPACK32(digest_[i], &digest[i << 2]);
263  // }
264 
265  auto const result = digest_;
266  reset_();
267  return result;
268 }
269 
270 // ================================================================================================
271 // Internal Functions
272 // ================================================================================================
273 
274 void SHA256::reset_()
275 {
276  digest_[0] = 0x6a09e667;
277  digest_[1] = 0xbb67ae85;
278  digest_[2] = 0x3c6ef372;
279  digest_[3] = 0xa54ff53a;
280  digest_[4] = 0x510e527f;
281  digest_[5] = 0x9b05688c;
282  digest_[6] = 0x1f83d9ab;
283  digest_[7] = 0x5be0cd19;
284  len_ = 0;
285  tot_len_ = 0;
286 }
287 
288 void SHA256::update_( unsigned char const* message, size_t len )
289 {
290  unsigned int block_nb;
291  unsigned int new_len, rem_len, tmp_len;
292  const unsigned char *shifted_message;
293  tmp_len = SHA256::BlockSize - len_;
294  rem_len = len < tmp_len ? len : tmp_len;
295  memcpy( &block_[len_], message, rem_len );
296  if (len_ + len < SHA256::BlockSize) {
297  len_ += len;
298  return;
299  }
300  new_len = len - rem_len;
301  block_nb = new_len / SHA256::BlockSize;
302  shifted_message = message + rem_len;
303  transform_( block_, 1);
304  transform_(shifted_message, block_nb);
305  rem_len = new_len % SHA256::BlockSize;
306  memcpy( block_, &shifted_message[block_nb << 6], rem_len);
307  len_ = rem_len;
308  tot_len_ += (block_nb + 1) << 6;
309 }
310 
311 void SHA256::transform_( unsigned char const* message, unsigned int block_nb )
312 {
313  uint32_t w[64];
314  uint32_t wv[8];
315  uint32_t t1, t2;
316  const unsigned char *sub_block;
317  int i;
318  int j;
319  for (i = 0; i < (int) block_nb; i++) {
320  sub_block = message + (i << 6);
321  for (j = 0; j < 16; j++) {
322  w[j] = SHA2_PACK32(&sub_block[j << 2]);
323  }
324  for (j = 16; j < 64; j++) {
325  w[j] = SHA256_F4(w[j - 2]) + w[j - 7] + SHA256_F3(w[j - 15]) + w[j - 16];
326  }
327  for (j = 0; j < 8; j++) {
328  wv[j] = digest_[j];
329  }
330  for (j = 0; j < 64; j++) {
331  t1 = wv[7] + SHA256_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
332  + sha256_k[j] + w[j];
333  t2 = SHA256_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
334  wv[7] = wv[6];
335  wv[6] = wv[5];
336  wv[5] = wv[4];
337  wv[4] = wv[3] + t1;
338  wv[3] = wv[2];
339  wv[2] = wv[1];
340  wv[1] = wv[0];
341  wv[0] = t1 + t2;
342  }
343  for (j = 0; j < 8; j++) {
344  digest_[j] += wv[j];
345  }
346  }
347 }
348 
349 } // namespace utils
350 } // namespace genesis
genesis::utils::SHA256::DigestType
std::array< uint32_t, 8 > DigestType
Store a SHA256 digest.
Definition: sha256.hpp:120
genesis::utils::SHA256::final_hex
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha256.cpp:239
genesis::utils::SHA256::read_digest
static DigestType read_digest(std::shared_ptr< BaseInputSource > source)
Calculate the hash digest for the content of an input source.
Definition: sha256.cpp:128
genesis::utils::SHA256::clear
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha256.cpp:187
genesis::utils::SHA256::hex_to_digest
static DigestType hex_to_digest(std::string const &hex)
Definition: sha256.cpp:154
genesis::utils::SHA256::update
void update(std::shared_ptr< BaseInputSource > source)
Definition: sha256.cpp:192
genesis::tree::length
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
Definition: tree/common_tree/functions.cpp:160
genesis::utils::SHA256::final_digest
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha256.cpp:245
genesis::utils::SHA256::read_hex
static std::string read_hex(std::shared_ptr< BaseInputSource > source)
Calculate the checksum for the content of an input source.
Definition: sha256.cpp:121
genesis::utils::SHA256::SHA256
SHA256()
Initialize the object for use.
Definition: sha256.cpp:112
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::SHA256::digest_to_hex
static std::string digest_to_hex(DigestType const &digest)
Definition: sha256.cpp:135
genesis::utils::SHA256::BlockSize
static const size_t BlockSize
Definition: sha256.hpp:110
genesis::utils::SHA256
Calculate SHA256 hashes for strings and files.
Definition: sha256.hpp:102
genesis::utils::InputBuffer
Definition: input_buffer.hpp:49
input_buffer.hpp
sha256.hpp