A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sha256.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
31 /*
32  =================================================================
33  SHA256 License
34  =================================================================
35 
36  Updated to C++, zedwood.com 2012
37  Based on Olivier Gay's version
38  See Modified BSD License below:
39 
40  FIPS 180-2 SHA-224/256/384/512 implementation
41  Issue date: 04/30/2005
42  http://www.ouah.org/ogay/sha2/
43 
44  Copyright (C) 2005, 2007 Olivier Gay <olivier.gay@a3.epfl.ch>
45  All rights reserved.
46 
47  Redistribution and use in source and binary forms, with or without
48  modification, are permitted provided that the following conditions
49  are met:
50  1. Redistributions of source code must retain the above copyright
51  notice, this list of conditions and the following disclaimer.
52  2. Redistributions in binary form must reproduce the above copyright
53  notice, this list of conditions and the following disclaimer in the
54  documentation and/or other materials provided with the distribution.
55  3. Neither the name of the project nor the names of its contributors
56  may be used to endorse or promote products derived from this software
57  without specific prior written permission.
58 
59  THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
60  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62  ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
63  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69  SUCH DAMAGE.
70 */
71 
73 
74 #include <algorithm>
75 #include <cinttypes>
76 #include <cstdio>
77 #include <cstring>
78 #include <fstream>
79 #include <iomanip>
80 #include <iostream>
81 #include <sstream>
82 #include <stdexcept>
83 
84 namespace genesis {
85 namespace utils {
86 
87 // ================================================================================================
88 // Constructors and Rule of Five
89 // ================================================================================================
90 
91 const unsigned int SHA256::sha256_k[64] = {
92  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
93  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
94  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
95  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
96  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
97  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
98  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
99  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
100  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
101  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
102  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
103  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
104  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
105  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
106  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
107  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
108 };
109 
111 {
112  reset_();
113 }
114 
115 // ================================================================================================
116 // Member Functions
117 // ================================================================================================
118 
120 {
121  reset_();
122 }
123 
124 void SHA256::update( std::string const& s )
125 {
126  update( s.c_str(), s.size() );
127 }
128 
129 void SHA256::update(std::istream& is)
130 {
131  char sbuf[SHA256::BlockSize];
132  while (true) {
133 
134  // Read a block and use it for an update.
135  is.read( sbuf, SHA256::BlockSize );
136  size_t cnt = is.gcount();
137  update( sbuf, cnt );
138 
139  // If we didn't get a full block, the stream is done.
140  if( cnt != SHA256::BlockSize ) {
141  return;
142  }
143  }
144 }
145 
146 void SHA256::update( char const* input, size_t length )
147 {
148  // Ugly conversion, but still better than the silent one used in the original code.
149  auto const* in_uchar = reinterpret_cast<unsigned char const*>( input );
150  update_( in_uchar, length);
151 }
152 
153 std::string SHA256::final_hex()
154 {
155  // Calculate digest, also reset for next use.
156  return digest_to_hex( final_digest() );
157 }
158 
160 {
161  unsigned int block_nb;
162  unsigned int pm_len;
163  unsigned int len_b;
164  block_nb = (1 + ((SHA256::BlockSize - 9) < (len_ % SHA256::BlockSize)));
165  len_b = (tot_len_ + len_) << 3;
166  pm_len = block_nb << 6;
167  memset(block_ + len_, 0, pm_len - len_);
168  block_[len_] = 0x80;
169  SHA2_UNPACK32(len_b, block_ + pm_len - 4);
170  transform_(block_, block_nb);
171 
172  // Turn into byte array. Might be useful later.
173  // unsigned char digest[SHA256::DigestSize];
174  // memset(digest,0,SHA256::DigestSize);
175  // for (size_t i = 0 ; i < 8; i++) {
176  // SHA2_UNPACK32(digest_[i], &digest[i << 2]);
177  // }
178 
179  auto const result = digest_;
180  reset_();
181  return result;
182 }
183 
184 std::string SHA256::from_file_hex( std::string const& filename )
185 {
186  std::ifstream stream( filename.c_str(), std::ios::binary );
187  SHA256 checksum;
188  checksum.update(stream);
189  return checksum.final_hex();
190 }
191 
192 SHA256::DigestType SHA256::from_file_digest( std::string const& filename )
193 {
194  std::ifstream stream( filename.c_str(), std::ios::binary );
195  SHA256 checksum;
196  checksum.update(stream);
197  return checksum.final_digest();
198 }
199 
200 std::string SHA256::from_string_hex( std::string const& input )
201 {
202  SHA256 checksum;
203  checksum.update( input );
204  return checksum.final_hex();
205 }
206 
208 {
209  SHA256 checksum;
210  checksum.update( input );
211  return checksum.final_digest();
212 }
213 
214 std::string SHA256::from_stream_hex( std::istream& is )
215 {
216  SHA256 checksum;
217  checksum.update(is);
218  return checksum.final_hex();
219 }
220 
222 {
223  SHA256 checksum;
224  checksum.update(is);
225  return checksum.final_digest();
226 }
227 
228 std::string SHA256::digest_to_hex( SHA256::DigestType const& digest )
229 {
230  // Simple version. Equally as fast as the printf one below.
231  std::ostringstream result;
232  for (size_t i = 0; i < digest.size(); ++i) {
233  result << std::hex << std::setfill('0') << std::setw(8);
234  result << static_cast<int>( digest[i] );
235  }
236  return result.str();
237 
238  // Print bytes to string.
239  // char buf[ 2 * DigestSize + 1 ];
240  // buf[ 2 * DigestSize ] = '\0';
241  // for( size_t i = 0; i < 8; i++ ) {
242  // sprintf( buf + i * 8, "%08x", digest[i] );
243  // }
244  // return std::string(buf);
245 }
246 
247 SHA256::DigestType SHA256::hex_to_digest( std::string const& hex )
248 {
249  // Safety first!
250  bool const all_hex = std::all_of( hex.begin(), hex.end(), []( char c ){
251  return std::isxdigit( c );
252  });
253  if( hex.size() != 64 || ! all_hex ) {
254  throw std::runtime_error( "Invalid SHA256 hex string." );
255  }
256 
257  // The following test was introduced to check the scanf format "%8x",
258  // which just is an "unsigned int", which is not a fixed size.
259  // We now use the SCNxN typedefs that offer fixed with replacements, see
260  // http://pubs.opengroup.org/onlinepubs/009604599/basedefs/inttypes.h.html
261 
262  // Make sure that the scan works! Need to have 32 bit type.
263  // static_assert(
264  // sizeof( unsigned int ) == 4,
265  // "Cannot compile SHA256::hex_to_digest() with sizeof( unsigned int ) != 4"
266  // );
267 
268  // Convert.
269  SHA256::DigestType result;
270  for (size_t i = 0; i < result.size(); ++i) {
271  // auto const n = sscanf( &hex[ 8 * i ], "%8x", &(result[i]) );
272  auto const n = sscanf( &hex[ 8 * i ], "%8" SCNx32, &(result[i]) );
273  if( n != 1 ) {
274  throw std::runtime_error( "Invalid SHA256 hex string." );
275  }
276  }
277 
278  return result;
279 }
280 
281 // ================================================================================================
282 // Internal Functions
283 // ================================================================================================
284 
285 void SHA256::reset_()
286 {
287  digest_[0] = 0x6a09e667;
288  digest_[1] = 0xbb67ae85;
289  digest_[2] = 0x3c6ef372;
290  digest_[3] = 0xa54ff53a;
291  digest_[4] = 0x510e527f;
292  digest_[5] = 0x9b05688c;
293  digest_[6] = 0x1f83d9ab;
294  digest_[7] = 0x5be0cd19;
295  len_ = 0;
296  tot_len_ = 0;
297 }
298 
299 void SHA256::update_( unsigned char const* message, size_t len )
300 {
301  unsigned int block_nb;
302  unsigned int new_len, rem_len, tmp_len;
303  const unsigned char *shifted_message;
304  tmp_len = SHA256::BlockSize - len_;
305  rem_len = len < tmp_len ? len : tmp_len;
306  memcpy( &block_[len_], message, rem_len );
307  if (len_ + len < SHA256::BlockSize) {
308  len_ += len;
309  return;
310  }
311  new_len = len - rem_len;
312  block_nb = new_len / SHA256::BlockSize;
313  shifted_message = message + rem_len;
314  transform_( block_, 1);
315  transform_(shifted_message, block_nb);
316  rem_len = new_len % SHA256::BlockSize;
317  memcpy( block_, &shifted_message[block_nb << 6], rem_len);
318  len_ = rem_len;
319  tot_len_ += (block_nb + 1) << 6;
320 }
321 
322 void SHA256::transform_( unsigned char const* message, unsigned int block_nb )
323 {
324  uint32_t w[64];
325  uint32_t wv[8];
326  uint32_t t1, t2;
327  const unsigned char *sub_block;
328  int i;
329  int j;
330  for (i = 0; i < (int) block_nb; i++) {
331  sub_block = message + (i << 6);
332  for (j = 0; j < 16; j++) {
333  w[j] = SHA2_PACK32(&sub_block[j << 2]);
334  }
335  for (j = 16; j < 64; j++) {
336  w[j] = SHA256_F4(w[j - 2]) + w[j - 7] + SHA256_F3(w[j - 15]) + w[j - 16];
337  }
338  for (j = 0; j < 8; j++) {
339  wv[j] = digest_[j];
340  }
341  for (j = 0; j < 64; j++) {
342  t1 = wv[7] + SHA256_F2(wv[4]) + SHA2_CH(wv[4], wv[5], wv[6])
343  + sha256_k[j] + w[j];
344  t2 = SHA256_F1(wv[0]) + SHA2_MAJ(wv[0], wv[1], wv[2]);
345  wv[7] = wv[6];
346  wv[6] = wv[5];
347  wv[5] = wv[4];
348  wv[4] = wv[3] + t1;
349  wv[3] = wv[2];
350  wv[2] = wv[1];
351  wv[1] = wv[0];
352  wv[0] = t1 + t2;
353  }
354  for (j = 0; j < 8; j++) {
355  digest_[j] += wv[j];
356  }
357  }
358 }
359 
360 } // namespace utils
361 } // namespace genesis
std::array< uint32_t, 8 > DigestType
Store a SHA256 digest.
Definition: sha256.hpp:114
std::string final_hex()
Finish the calculation, prepare the object for next use, and return the hash.
Definition: sha256.cpp:153
Calculate SHA256 hashes for strings and files.
Definition: sha256.hpp:96
void clear()
Reset to initial state, that is, delete any intermediate input from update() calls.
Definition: sha256.cpp:119
static DigestType hex_to_digest(std::string const &hex)
Definition: sha256.cpp:247
DigestType final_digest()
Finish the calculation, prepare the object for next use, and return the digest.
Definition: sha256.cpp:159
static DigestType from_file_digest(std::string const &filename)
Calculate the hash digest for the content of a file, given its path.
Definition: sha256.cpp:192
static std::string from_stream_hex(std::istream &is)
Calculate the checksum for the content of a stream.
Definition: sha256.cpp:214
static DigestType from_stream_digest(std::istream &is)
Calculate the hash digest for the content of a stream.
Definition: sha256.cpp:221
SHA256()
Initialize the object for use.
Definition: sha256.cpp:110
void update(std::string const &s)
Add the contents of a string to the hash digest.
Definition: sha256.cpp:124
static std::string digest_to_hex(DigestType const &digest)
Definition: sha256.cpp:228
static std::string from_file_hex(std::string const &filename)
Calculate the checksum for the content of a file, given its path.
Definition: sha256.cpp:184
static DigestType from_string_digest(std::string const &input)
Calculate the hash digest for the content of a string.
Definition: sha256.cpp:207
static const size_t BlockSize
Definition: sha256.hpp:104
static std::string from_string_hex(std::string const &input)
Calculate the checksum for the content of a string.
Definition: sha256.cpp:200
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.