48 #include <immintrin.h>
81 1.0, 0.794328234724281, 0.630957344480193, 0.501187233627272,
82 0.398107170553497, 0.316227766016838, 0.251188643150958, 0.199526231496888,
83 0.158489319246111, 0.125892541179417,
84 0.1, 0.0794328234724281, 0.0630957344480193, 0.0501187233627272,
85 0.0398107170553497, 0.0316227766016838, 0.0251188643150958, 0.0199526231496888,
86 0.0158489319246111, 0.0125892541179417,
87 0.01, 0.00794328234724281, 0.00630957344480193, 0.00501187233627272,
88 0.00398107170553497, 0.00316227766016838, 0.00251188643150958, 0.00199526231496888,
89 0.00158489319246111, 0.00125892541179417,
90 0.001, 0.000794328234724281, 0.000630957344480193, 0.000501187233627273,
91 0.000398107170553497, 0.000316227766016838, 0.000251188643150958, 0.000199526231496888,
92 0.000158489319246111, 0.000125892541179417,
93 0.0001, 7.94328234724282e-05, 6.30957344480193e-05, 5.01187233627273e-05,
94 3.98107170553497e-05, 3.16227766016838e-05, 2.51188643150958e-05, 1.99526231496888e-05,
95 1.58489319246111e-05, 1.25892541179417e-05,
96 1e-05, 7.94328234724282e-06, 6.30957344480193e-06, 5.01187233627272e-06,
97 3.98107170553497e-06, 3.16227766016838e-06, 2.51188643150958e-06, 1.99526231496888e-06,
98 1.58489319246111e-06, 1.25892541179417e-06,
99 1e-06, 7.94328234724282e-07, 6.30957344480193e-07, 5.01187233627272e-07,
100 3.98107170553497e-07, 3.16227766016838e-07, 2.51188643150958e-07, 1.99526231496888e-07,
101 1.58489319246111e-07, 1.25892541179417e-07,
102 1e-07, 7.94328234724282e-08, 6.30957344480193e-08, 5.01187233627272e-08,
103 3.98107170553497e-08, 3.16227766016838e-08, 2.51188643150958e-08, 1.99526231496888e-08,
104 1.58489319246111e-08, 1.25892541179417e-08,
105 1e-08, 7.94328234724282e-09, 6.30957344480194e-09, 5.01187233627271e-09,
106 3.98107170553497e-09, 3.16227766016838e-09, 2.51188643150958e-09, 1.99526231496888e-09,
107 1.58489319246111e-09, 1.25892541179417e-09,
108 1e-09, 7.94328234724282e-10, 6.30957344480194e-10, 5.01187233627271e-10,
109 3.98107170553497e-10, 3.16227766016838e-10, 2.51188643150958e-10, 1.99526231496888e-10,
110 1.58489319246111e-10, 1.25892541179417e-10,
111 1e-10, 7.94328234724282e-11, 6.30957344480194e-11, 5.01187233627271e-11,
112 3.98107170553497e-11, 3.16227766016838e-11, 2.51188643150958e-11, 1.99526231496888e-11,
113 1.58489319246111e-11, 1.25892541179417e-11,
114 1e-11, 7.94328234724282e-12, 6.30957344480194e-12, 5.01187233627271e-12,
115 3.98107170553497e-12, 3.16227766016838e-12, 2.51188643150958e-12, 1.99526231496888e-12,
116 1.58489319246111e-12, 1.25892541179417e-12,
117 1e-12, 7.94328234724282e-13, 6.30957344480194e-13, 5.01187233627271e-13,
118 3.98107170553497e-13, 3.16227766016838e-13, 2.51188643150958e-13, 1.99526231496888e-13,
119 1.58489319246111e-13, 1.25892541179417e-13,
120 1e-13, 7.94328234724282e-14, 6.30957344480194e-14, 5.01187233627271e-14,
121 3.98107170553497e-14, 3.16227766016838e-14, 2.51188643150958e-14, 1.99526231496888e-14,
122 1.58489319246111e-14, 1.25892541179417e-14,
123 1e-14, 7.94328234724282e-15, 6.30957344480194e-15, 5.01187233627271e-15,
124 3.98107170553497e-15, 3.16227766016838e-15, 2.51188643150958e-15, 1.99526231496888e-15,
125 1.58489319246111e-15, 1.25892541179417e-15,
126 1e-15, 7.94328234724282e-16, 6.30957344480194e-16, 5.01187233627271e-16,
127 3.98107170553497e-16, 3.16227766016838e-16, 2.51188643150958e-16, 1.99526231496888e-16,
128 1.58489319246111e-16, 1.25892541179417e-16,
129 1e-16, 7.94328234724279e-17, 6.30957344480194e-17, 5.01187233627271e-17,
130 3.98107170553499e-17, 3.16227766016838e-17, 2.51188643150957e-17, 1.99526231496888e-17,
131 1.58489319246111e-17, 1.25892541179417e-17,
132 1e-17, 7.94328234724279e-18, 6.30957344480194e-18, 5.01187233627271e-18,
133 3.98107170553499e-18, 3.16227766016838e-18, 2.51188643150957e-18, 1.99526231496888e-18,
134 1.58489319246111e-18, 1.25892541179417e-18,
135 1e-18, 7.94328234724279e-19, 6.30957344480194e-19, 5.01187233627271e-19,
136 3.98107170553499e-19, 3.16227766016838e-19, 2.51188643150957e-19, 1.99526231496888e-19,
137 1.58489319246111e-19, 1.25892541179417e-19,
138 1e-19, 7.94328234724279e-20, 6.30957344480194e-20, 5.01187233627271e-20,
139 3.98107170553499e-20, 3.16227766016838e-20, 2.51188643150957e-20, 1.99526231496888e-20,
140 1.58489319246111e-20, 1.25892541179417e-20,
141 1e-20, 7.94328234724279e-21, 6.30957344480194e-21, 5.01187233627271e-21,
142 3.98107170553499e-21, 3.16227766016838e-21, 2.51188643150957e-21, 1.99526231496888e-21,
143 1.58489319246111e-21, 1.25892541179417e-21,
144 1e-21, 7.94328234724279e-22, 6.30957344480194e-22, 5.01187233627272e-22,
145 3.98107170553499e-22, 3.16227766016838e-22, 2.51188643150957e-22, 1.99526231496888e-22,
146 1.58489319246111e-22, 1.25892541179417e-22,
147 1e-22, 7.94328234724279e-23, 6.30957344480194e-23, 5.01187233627271e-23,
148 3.98107170553499e-23, 3.16227766016838e-23, 2.51188643150957e-23, 1.99526231496888e-23,
149 1.58489319246111e-23, 1.25892541179417e-23,
150 1e-23, 7.94328234724279e-24, 6.30957344480194e-24, 5.01187233627271e-24,
151 3.98107170553499e-24, 3.16227766016838e-24, 2.51188643150957e-24, 1.99526231496888e-24,
152 1.58489319246111e-24, 1.25892541179417e-24,
153 1e-24, 7.94328234724279e-25, 6.30957344480194e-25, 5.01187233627272e-25,
154 3.98107170553499e-25, 3.16227766016838e-25, 2.51188643150957e-25, 1.99526231496888e-25,
155 1.58489319246111e-25, 1.25892541179417e-25,
156 1e-25, 7.94328234724279e-26, 6.30957344480194e-26, 5.01187233627271e-26,
157 3.98107170553499e-26, 3.16227766016838e-26
169 throw std::invalid_argument(
181 return "Illumina 1.3+";
183 return "Illumina 1.5+";
185 return "Illumina 1.8+";
190 throw std::invalid_argument(
"Invalid quality encoding type." );
198 s.erase( std::remove_if(s.begin(), s.end(), [](
char c ){
199 return ! utils::is_alnum( c );
203 if( s ==
"sanger" ) {
205 }
else if( s ==
"illumina13" ) {
207 }
else if( s ==
"illumina15" ) {
209 }
else if( s ==
"illumina18" || s ==
"illumina" ) {
211 }
else if( s ==
"solexa" ) {
214 throw std::runtime_error(
"Invalid quality encoding name: \"" + name +
"\"" );
230 if( quality_code < 33 || quality_code >= 127 ) {
233 return static_cast<unsigned char>( quality_code ) - 33;
237 if( quality_code < 64 || quality_code >= 127 ) {
240 return static_cast<unsigned char>( quality_code ) - 64;
243 if( quality_code < 59 || quality_code >= 127 ) {
249 throw std::invalid_argument(
"Invalid quality encoding type." );
254 std::string
const& quality_codes,
258 auto result = std::vector<unsigned char>( quality_codes.size() );
280 throw std::invalid_argument(
"Invalid quality encoding type." );
297 auto const* data =
reinterpret_cast<__m256i const*
>( quality_codes.c_str() );
298 auto* write =
reinterpret_cast<__m256i*
>( result.data() );
302 auto const m_offset = _mm256_set1_epi8(
offset );
303 auto static const m_upper = _mm256_set1_epi8( 126 );
307 for(
size_t j = 0; j < quality_codes.size() / 32; ++j ) {
310 auto const m_data = _mm256_loadu_si256( data + j );
311 auto const m_min = _mm256_cmpgt_epi8( m_offset, m_data );
312 auto const m_max = _mm256_cmpgt_epi8( m_data, m_upper );
316 good &= _mm256_testz_si256( m_min, m_min ) && _mm256_testz_si256( m_max, m_max );
319 auto const m_result = _mm256_sub_epi8( m_data, m_offset );
320 _mm256_storeu_si256( write + j, m_result );
325 i = quality_codes.size() / 32 * 32;
327 #endif // GENESIS_AVX
333 for( ; i < quality_codes.size(); ++i ) {
334 good &= ( quality_codes[i] >=
offset ) && ( quality_codes[i] <= 126 );
335 result[i] =
static_cast<unsigned char>( quality_codes[i] ) -
offset;
341 for(
size_t i = 0; i < quality_codes.size(); ++i ) {
342 if( quality_codes[i] <
offset || quality_codes[i] >= 127 ) {
385 for(
size_t i = 0; i < quality_codes.size(); ++i ) {
402 for(
size_t i = 0; i < char_counts.size(); ++i ) {
403 if( char_counts[i] > 0 ) {
411 for(
size_t i = char_counts.size(); i > 0; --i ) {
412 if( char_counts[i-1] > 0 ) {
419 if( min < 33 || max >= 127 ) {
420 throw std::runtime_error(
421 "Invalid char counts provided to guess quality score encoding. Only printable "
422 "characters (ASCII range 33 to 127) are allowed in fastq quality encodings."
425 assert( min <= max );
460 auto char_counts = std::array<size_t, 128>();
465 reader.quality_string_plugin(
466 [&]( std::string
const& quality_string,
Sequence& sequence )
469 for(
auto q : quality_string ) {
473 auto qu =
static_cast<unsigned char>(q);
475 throw std::invalid_argument(
476 "Invalid quality score character outside of the ASCII range."
481 assert( qu < char_counts.size() );
490 while( reader.parse_sequence( input_stream, seq ) ) {
504 if( ! std::isfinite( error_probability ) || error_probability < 0.0 || error_probability > 1.0 ) {
505 throw std::invalid_argument(
506 "Cannot convert error probability outside of range [0.0, 1.0] to phred score."
513 double v = std::round( -10.0 * std::log10( error_probability ));
514 v = std::min( v, 255.0 );
515 return static_cast<unsigned char>( v );
526 if( ! std::isfinite( error_probability ) || error_probability < 0.0 || error_probability > 1.0 ) {
527 throw std::invalid_argument(
528 "Cannot convert error probability outside of range [0.0, 1.0] to solexa score."
546 double v = std::round( -10.0 * std::log10( error_probability / ( 1.0 - error_probability )));
547 v = std::min( v, 127.0 );
548 v = std::max( v, -128.0 );
549 return static_cast<signed char>( v );
554 if( solexa_score < -5 ) {
557 double const t = std::pow( 10.0,
static_cast<double>( solexa_score ) / -10.0 );
558 return t / ( 1.0 + t );
563 if( phred_score <= 1 ) {
566 double v = std::round(
567 10.0 * std::log10( std::pow( 10.0,
static_cast<double>( phred_score ) / 10.0 ) - 1.0 )
569 v = std::min( v, 127.0 );
570 return static_cast<signed char>( v );
576 return static_cast<unsigned char>( std::round(
577 10.0 * std::log10( std::pow( 10.0,
static_cast<double>( solexa_score ) / 10.0 ) + 1.0 )
583 auto res = std::vector<unsigned char>( error_probability.size(), 0 );
584 for(
size_t i = 0; i < error_probability.size(); ++i ) {
592 auto res = std::vector<double>( phred_score.size(), 0 );
593 for(
size_t i = 0; i < phred_score.size(); ++i ) {
601 auto res = std::vector<signed char>( error_probability.size(), 0 );
602 for(
size_t i = 0; i < error_probability.size(); ++i ) {
610 auto res = std::vector<double>( solexa_score.size(), 0 );
611 for(
size_t i = 0; i < solexa_score.size(); ++i ) {
619 auto res = std::vector<signed char>( phred_score.size(), 0 );
620 for(
size_t i = 0; i < phred_score.size(); ++i ) {
628 auto res = std::vector<unsigned char>( solexa_score.size(), 0 );
629 for(
size_t i = 0; i < solexa_score.size(); ++i ) {