A library for working with phylogenetic and population genetic data.
v0.27.0
quality.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <array>
35 #include <iosfwd>
36 #include <memory>
37 #include <stdexcept>
38 #include <string>
39 #include <vector>
40 
41 namespace genesis {
42 
43 // =================================================================================================
44 // Forward declarations
45 // =================================================================================================
46 
47 namespace utils {
48  class BaseInputSource;
49 }
50 
51 namespace sequence {
52 
53 // =================================================================================================
54 // Quality Encoding and Decoding
55 // =================================================================================================
56 
72 enum class QualityEncoding
73 {
74  kSanger,
75  kSolexa,
79 };
80 
86 std::string quality_encoding_name( QualityEncoding encoding );
87 
97 QualityEncoding guess_quality_encoding_from_name( std::string const& name );
98 
109 unsigned char quality_decode_to_phred_score(
110  char quality_code,
112 );
113 
119 std::vector<unsigned char> quality_decode_to_phred_score(
120  std::string const& quality_codes,
122 );
123 
140 inline char quality_encode_from_phred_score( unsigned char phred_score, bool clamp = true )
141 {
142  // Only do one branch here, as this should be rare case, and then test and branch again inside.
143  if( phred_score > 93 ) {
144  if( clamp ) {
145  phred_score = 93;
146  // phred_score = std::min( phred_score, static_cast<unsigned char>(93) );
147  } else {
148  throw std::invalid_argument(
149  "Cannot encode phred score outside of [0, 93] to Sanger format."
150  );
151  }
152  }
153  return static_cast<char>( phred_score + 33 );
154 }
155 
162  std::vector<unsigned char> const& phred_scores,
163  bool clamp = true
164 ) {
165  auto qualities = std::string( phred_scores.size(), ' ' );
166  for( size_t i = 0; i < phred_scores.size(); ++i ) {
167  qualities[i] = quality_encode_from_phred_score( phred_scores[i], clamp );
168  }
169  return qualities;
170 }
171 
172 // =================================================================================================
173 // Guess Quality Encoding Type
174 // =================================================================================================
175 
186 QualityEncoding guess_quality_encoding( std::array<size_t, 128> const& char_counts );
187 
195 QualityEncoding guess_fastq_quality_encoding( std::shared_ptr< utils::BaseInputSource > source );
196 
197 // =================================================================================================
198 // Quality Computations
199 // =================================================================================================
200 
201 unsigned char error_probability_to_phred_score( double error_probability );
202 std::vector<unsigned char> error_probability_to_phred_score( std::vector<double> error_probability );
203 
204 double phred_score_to_error_probability( unsigned char phred_score );
205 std::vector<double> phred_score_to_error_probability( std::vector<unsigned char> phred_score );
206 
207 signed char error_probability_to_solexa_score( double error_probability );
208 std::vector<signed char> error_probability_to_solexa_score( std::vector<double> error_probability );
209 
210 double solexa_score_to_error_probability( signed char solexa_score );
211 std::vector<double> solexa_score_to_error_probability( std::vector<signed char> solexa_score );
212 
213 signed char phred_score_to_solexa_score( unsigned char phred_score );
214 std::vector<signed char> phred_score_to_solexa_score( std::vector<unsigned char> phred_score );
215 
216 unsigned char solexa_score_to_phred_score( signed char solexa_score );
217 std::vector<unsigned char> solexa_score_to_phred_score( std::vector<signed char> solexa_score );
218 
219 } // namespace sequence
220 } // namespace genesis
221 
222 #endif // include guard
genesis::sequence::phred_score_to_error_probability
double phred_score_to_error_probability(unsigned char phred_score)
Definition: quality.cpp:518
genesis::sequence::quality_decode_to_phred_score
unsigned char quality_decode_to_phred_score(char quality_code, QualityEncoding encoding)
Decode a single quality score char (for example coming from a fastq file) to a phred score.
Definition: quality.cpp:218
genesis::sequence::guess_quality_encoding_from_name
QualityEncoding guess_quality_encoding_from_name(std::string const &name)
Guess the QualityEncoding type, given its description name.
Definition: quality.cpp:194
genesis::sequence::QualityEncoding::kSanger
@ kSanger
genesis::sequence::QualityEncoding::kIllumina15
@ kIllumina15
genesis::sequence::QualityEncoding::kSolexa
@ kSolexa
genesis::sequence::QualityEncoding::kIllumina13
@ kIllumina13
genesis::sequence::QualityEncoding::kIllumina18
@ kIllumina18
genesis::sequence::quality_encoding_name
std::string quality_encoding_name(QualityEncoding encoding)
Return a readable name for each of the encoding types.
Definition: quality.cpp:175
genesis::sequence::QualityEncoding
QualityEncoding
List of quality encodings for which we support decoding.
Definition: quality.hpp:72
genesis::sequence::guess_fastq_quality_encoding
QualityEncoding guess_fastq_quality_encoding(std::shared_ptr< utils::BaseInputSource > source)
Guess the quality score encoding for a fastq input, based on counts of how often each char appeared i...
Definition: quality.cpp:457
genesis::sequence::error_probability_to_solexa_score
signed char error_probability_to_solexa_score(double error_probability)
Definition: quality.cpp:524
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::guess_quality_encoding
QualityEncoding guess_quality_encoding(std::array< size_t, 128 > const &char_counts)
Guess the quality score encoding, based on counts of how often each char appeared in the quality stri...
Definition: quality.cpp:398
genesis::sequence::phred_score_to_solexa_score
signed char phred_score_to_solexa_score(unsigned char phred_score)
Definition: quality.cpp:561
genesis::sequence::quality_encode_from_phred_score
char quality_encode_from_phred_score(unsigned char phred_score, bool clamp=true)
Encode a phred score into a quality char, using the Sanger convention.
Definition: quality.hpp:140
genesis::sequence::solexa_score_to_phred_score
unsigned char solexa_score_to_phred_score(signed char solexa_score)
Definition: quality.cpp:573
genesis::sequence::solexa_score_to_error_probability
double solexa_score_to_error_probability(signed char solexa_score)
Definition: quality.cpp:552
genesis::sequence::error_probability_to_phred_score
unsigned char error_probability_to_phred_score(double error_probability)
Definition: quality.cpp:502