A toolkit for working with phylogenetic data.
v0.24.0
quality.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_QUALITY_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <array>
35 #include <iosfwd>
36 #include <memory>
37 #include <stdexcept>
38 #include <string>
39 #include <vector>
40 
41 namespace genesis {
42 
43 // =================================================================================================
44 // Forward declarations
45 // =================================================================================================
46 
47 namespace utils {
48  class BaseInputSource;
49 }
50 
51 namespace sequence {
52 
53 // =================================================================================================
54 // Quality Encoding and Decoding
55 // =================================================================================================
56 
71 enum class QualityEncoding
72 {
73  kSanger,
74  kSolexa,
78 };
79 
85 std::string quality_encoding_name( QualityEncoding encoding );
86 
97 unsigned char quality_decode_to_phred_score(
98  char quality_code,
99  QualityEncoding encoding = QualityEncoding::kSanger
100 );
101 
107 std::vector<unsigned char> quality_decode_to_phred_score(
108  std::string const& quality_codes,
109  QualityEncoding encoding = QualityEncoding::kSanger
110 );
111 
128 inline char quality_encode_from_phred_score( unsigned char phred_score, bool clamp = true )
129 {
130  // Only do one branch here, as this should be rare case, and then test and branch again inside.
131  if( phred_score > 93 ) {
132  if( clamp ) {
133  phred_score = std::min( phred_score, static_cast<unsigned char>(93) );
134  } else {
135  throw std::invalid_argument(
136  "Cannot encode phred score outside of [0, 93] to Sanger format."
137  );
138  }
139  }
140  return static_cast<char>( phred_score + 33 );
141 }
142 
149  std::vector<unsigned char> const& phred_scores,
150  bool clamp = true
151 ) {
152  auto qualities = std::string( phred_scores.size(), ' ' );
153  for( size_t i = 0; i < phred_scores.size(); ++i ) {
154  qualities[i] = quality_encode_from_phred_score( phred_scores[i], clamp );
155  }
156  return qualities;
157 }
158 
159 // =================================================================================================
160 // Guess Quality Encoding Type
161 // =================================================================================================
162 
173 QualityEncoding guess_fastq_quality_encoding( std::array<size_t, 128> const& char_counts );
174 
182 QualityEncoding guess_fastq_quality_encoding( std::shared_ptr< utils::BaseInputSource > source );
183 
184 // =================================================================================================
185 // Quality Computations
186 // =================================================================================================
187 
188 unsigned char error_probability_to_phred_score( double error_probability );
189 
190 double phred_score_to_error_probability( unsigned char phred_score );
191 
192 signed char error_probability_to_solexa_score( double error_probability );
193 
194 double solexa_score_to_error_probability( signed char solexa_score );
195 
196 signed char phred_score_to_solexa_score( unsigned char phred_score );
197 
198 unsigned char solexa_score_to_phred_score( signed char solexa_score );
199 
200 } // namespace sequence
201 } // namespace genesis
202 
203 #endif // include guard
unsigned char solexa_score_to_phred_score(signed char solexa_score)
Definition: quality.cpp:440
unsigned char quality_decode_to_phred_score(char quality_code, QualityEncoding encoding)
Decode a single quality score char (for example coming from a fastq file) to a phred score...
Definition: quality.cpp:86
std::string quality_encode_from_phred_score(std::vector< unsigned char > const &phred_scores, bool clamp=true)
Encode phred scores into quality chars, using the Sanger convention.
Definition: quality.hpp:148
unsigned char error_probability_to_phred_score(double error_probability)
Definition: quality.cpp:370
signed char error_probability_to_solexa_score(double error_probability)
Definition: quality.cpp:391
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::string quality_encoding_name(QualityEncoding encoding)
Return a readable name for each of the encoding types.
Definition: quality.cpp:67
double solexa_score_to_error_probability(signed char solexa_score)
Definition: quality.cpp:419
QualityEncoding guess_fastq_quality_encoding(std::array< size_t, 128 > const &char_counts)
Guess the quality score encoding, based on counts of how often each char appeared in the quality stri...
Definition: quality.cpp:266
double phred_score_to_error_probability(unsigned char phred_score)
Definition: quality.cpp:386
QualityEncoding
List of quality encodings for which we support decoding.
Definition: quality.hpp:71
signed char phred_score_to_solexa_score(unsigned char phred_score)
Definition: quality.cpp:428