A toolkit for working with phylogenetic data.
v0.24.0
gzip.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
35 
36 #include <cassert>
37 #include <fstream>
38 #include <stdexcept>
39 
40 #ifdef GENESIS_ZLIB
41 
42 # include "zlib.h"
43 
44 # if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
45 # include <fcntl.h>
46 # include <io.h>
47 # endif
48 
49 #endif // GENESIS_ZLIB
50 
51 namespace genesis {
52 namespace utils {
53 
54 // ================================================================================================
55 // General gzip/zlib Functions
56 // ================================================================================================
57 
58 bool is_gzip_compressed_file( std::string const& file_name )
59 {
60  // Check if the file extension hints at gzip.
61  auto const ext = file_extension( file_name );
62  bool const ext_gz = ( ext == "gz" || ext == "gzip" );
63 
64  // Open the file in binary mode.
65  std::ifstream infile;
66  infile.open( file_name, std::ifstream::in | std::ifstream::binary );
67  if( !infile.good() ) {
68  return false;
69  // throw std::runtime_error( "Cannot read from file '" + file_name + "'." );
70  }
71 
72  // Get the first two characters. If this fails, the file is too short, so it is not a gzip file.
73  unsigned char buffer[2];
74  infile.read( reinterpret_cast<char*>( &buffer ), 2 );
75  if( !infile.good() ) {
76  return false;
77  }
78  infile.close();
79 
80  // Check if the file starts with the magic number of gz files.
81  bool const magic = ( buffer[0] == 0x1f ) && ( buffer[1] == 0x8b );
82 
83  // If extension and magic number agree, we have a clear result.
84  // Otherwise, issue a warning, and return the magic bit, because this is what we trust more.
85  if( ext_gz && magic ) {
86  return true;
87  } else if( ! ext_gz && ! magic ) {
88  return false;
89  } else if( ext_gz && ! magic ) {
90  LOG_WARN << "File name '" << file_name << "' ends in '.gz', but the file does not seem "
91  << "to contain gzip content.";
92  } else if( ! ext_gz && magic ) {
93  LOG_WARN << "File name '" << file_name << "' does not end in '.gz', but the file seems "
94  << "to contain gzip content.";
95  } else {
96  // We have checked every compbination already.
97  assert( false );
98  }
99  return magic;
100 }
101 
102 // ================================================================================================
103 // Gzip Exception Class
104 // ================================================================================================
105 
106 } // namespace utils
107 namespace except {
108 
109 #ifdef GENESIS_ZLIB
110 
111 GzipError::GzipError( std::string const& z_stream_message, int error_code )
112  : except::Exception("")
113 {
114  // Need to have this method in the cpp file, so that we do not expose the zlib header
115  // to the header file, which would include all its symbols to whichever class uses our headers...
116 
117  message_ = "zlib: ";
118  switch( error_code )
119  {
120  case Z_STREAM_ERROR:
121  message_ += "Invalid compression level. [Z_STREAM_ERROR: ";
122  break;
123  case Z_DATA_ERROR:
124  message_ += "Invalid or incomplete deflate data. [Z_DATA_ERROR: ";
125  break;
126  case Z_MEM_ERROR:
127  message_ += "Out of memory. [Z_MEM_ERROR: ";
128  break;
129  case Z_VERSION_ERROR:
130  message_ += "Version mismatch! [Z_VERSION_ERROR: ";
131  break;
132  case Z_BUF_ERROR:
133  message_ += "Buffer error. [Z_BUF_ERROR: ";
134  break;
135  case Z_ERRNO:
136  message_ += "Error while reading zlib/gzip input. [Z_ERRNO: ";
137  break;
138  default:
139  message_ += "Unknown error. [" + std::to_string(error_code) + ": ";
140  break;
141  }
142  message_ += z_stream_message + "]";
143 }
144 
145 #else // GENESIS_ZLIB
146 
147 GzipError::GzipError( std::string const& z_stream_message, int error_code )
148  : message_("zlib: Genesis was not compiled with zlib support.")
149 {}
150 
151 #endif // GENESIS_ZLIB
152 
153 } // namespace except
154 } // namespace genesis
std::string file_extension(std::string const &filename)
Return the extension name of a file.
Definition: fs.cpp:707
#define LOG_WARN
Log a warning. See genesis::utils::LoggingLevel.
Definition: logging.hpp:96
Base class for genesis exceptions.
Definition: exception.hpp:53
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
GzipError(std::string const &z_stream_message, int error_code)
Definition: gzip.cpp:111
Provides functions for accessing the file system.
Provides easy and fast logging functionality.
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
bool is_gzip_compressed_file(std::string const &file_name)
Return whether a given file is gzip-compressed.
Definition: gzip.cpp:58