A library for working with phylogenetic and population genetic data.
v0.32.0
quality.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2023 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <array>
35 #include <iosfwd>
36 #include <memory>
37 #include <stdexcept>
38 #include <string>
39 #include <vector>
40 
41 namespace genesis {
42 
43 // =================================================================================================
44 // Forward declarations
45 // =================================================================================================
46 
47 namespace utils {
48  class BaseInputSource;
49 }
50 
51 namespace sequence {
52 
53 // =================================================================================================
54 // Quality Encoding and Decoding
55 // =================================================================================================
56 
72 enum class QualityEncoding
73 {
74  kSanger,
75  kSolexa,
79 };
80 
87 std::string quality_encoding_name( QualityEncoding encoding, bool with_offset = false );
88 
98 QualityEncoding guess_quality_encoding_from_name( std::string const& name );
99 
110 unsigned char quality_decode_to_phred_score(
111  char quality_code,
113 );
114 
120 std::vector<unsigned char> quality_decode_to_phred_score(
121  std::string const& quality_codes,
123 );
124 
141 inline char quality_encode_from_phred_score( unsigned char phred_score, bool clamp = true )
142 {
143  // Only do one branch here, as this should be rare case, and then test and branch again inside.
144  if( phred_score > 93 ) {
145  if( clamp ) {
146  phred_score = 93;
147  // phred_score = std::min( phred_score, static_cast<unsigned char>(93) );
148  } else {
149  throw std::invalid_argument(
150  "Cannot encode phred score outside of [0, 93] to Sanger format."
151  );
152  }
153  }
154  return static_cast<char>( phred_score + 33 );
155 }
156 
163  std::vector<unsigned char> const& phred_scores,
164  bool clamp = true
165 ) {
166  auto qualities = std::string( phred_scores.size(), ' ' );
167  for( size_t i = 0; i < phred_scores.size(); ++i ) {
168  qualities[i] = quality_encode_from_phred_score( phred_scores[i], clamp );
169  }
170  return qualities;
171 }
172 
173 // =================================================================================================
174 // Guess Quality Encoding Type
175 // =================================================================================================
176 
188 
201 QualityEncoding guess_quality_encoding( std::array<size_t, 128> const& char_counts );
202 
215  std::shared_ptr< utils::BaseInputSource > source,
216  size_t max_lines = 0,
217  size_t max_chars = 0
218 );
219 
220 // =================================================================================================
221 // Quality Computations
222 // =================================================================================================
223 
224 unsigned char error_probability_to_phred_score( double error_probability );
225 std::vector<unsigned char> error_probability_to_phred_score( std::vector<double> error_probability );
226 
227 double phred_score_to_error_probability( unsigned char phred_score );
228 std::vector<double> phred_score_to_error_probability( std::vector<unsigned char> phred_score );
229 
230 signed char error_probability_to_solexa_score( double error_probability );
231 std::vector<signed char> error_probability_to_solexa_score( std::vector<double> error_probability );
232 
233 double solexa_score_to_error_probability( signed char solexa_score );
234 std::vector<double> solexa_score_to_error_probability( std::vector<signed char> solexa_score );
235 
236 signed char phred_score_to_solexa_score( unsigned char phred_score );
237 std::vector<signed char> phred_score_to_solexa_score( std::vector<unsigned char> phred_score );
238 
239 unsigned char solexa_score_to_phred_score( signed char solexa_score );
240 std::vector<unsigned char> solexa_score_to_phred_score( std::vector<signed char> solexa_score );
241 
242 } // namespace sequence
243 } // namespace genesis
244 
245 #endif // include guard
genesis::sequence::phred_score_to_error_probability
double phred_score_to_error_probability(unsigned char phred_score)
Definition: quality.cpp:581
genesis::sequence::quality_decode_to_phred_score
unsigned char quality_decode_to_phred_score(char quality_code, QualityEncoding encoding)
Decode a single quality score char (for example coming from a fastq file) to a phred score.
Definition: quality.cpp:221
genesis::sequence::guess_quality_encoding_from_name
QualityEncoding guess_quality_encoding_from_name(std::string const &name)
Guess the QualityEncoding type, given its description name.
Definition: quality.cpp:197
genesis::sequence::quality_encoding_name
std::string quality_encoding_name(QualityEncoding encoding, bool with_offset)
Return a readable name for each of the encoding types.
Definition: quality.cpp:177
genesis::sequence::QualityEncoding::kSanger
@ kSanger
genesis::sequence::QualityEncoding::kIllumina15
@ kIllumina15
genesis::sequence::QualityEncoding::kSolexa
@ kSolexa
genesis::sequence::QualityEncoding::kIllumina13
@ kIllumina13
genesis::sequence::compatible_quality_encodings
bool compatible_quality_encodings(QualityEncoding lhs, QualityEncoding rhs)
Return whether two quality encodings are compatible with each other.
Definition: quality.cpp:401
genesis::sequence::QualityEncoding::kIllumina18
@ kIllumina18
genesis::sequence::QualityEncoding
QualityEncoding
List of quality encodings for which we support decoding.
Definition: quality.hpp:72
genesis::sequence::error_probability_to_solexa_score
signed char error_probability_to_solexa_score(double error_probability)
Definition: quality.cpp:587
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::guess_quality_encoding
QualityEncoding guess_quality_encoding(std::array< size_t, 128 > const &char_counts)
Guess the quality score encoding, based on counts of how often each char appeared in the quality stri...
Definition: quality.cpp:433
genesis::sequence::guess_fastq_quality_encoding
QualityEncoding guess_fastq_quality_encoding(std::shared_ptr< utils::BaseInputSource > source, size_t max_lines, size_t max_chars)
Guess the quality score encoding for a fastq input, based on counts of how often each char appeared i...
Definition: quality.cpp:498
genesis::sequence::phred_score_to_solexa_score
signed char phred_score_to_solexa_score(unsigned char phred_score)
Definition: quality.cpp:624
genesis::sequence::quality_encode_from_phred_score
char quality_encode_from_phred_score(unsigned char phred_score, bool clamp=true)
Encode a phred score into a quality char, using the Sanger convention.
Definition: quality.hpp:141
genesis::sequence::solexa_score_to_phred_score
unsigned char solexa_score_to_phred_score(signed char solexa_score)
Definition: quality.cpp:636
genesis::sequence::solexa_score_to_error_probability
double solexa_score_to_error_probability(signed char solexa_score)
Definition: quality.cpp:615
genesis::sequence::error_probability_to_phred_score
unsigned char error_probability_to_phred_score(double error_probability)
Definition: quality.cpp:565