47 #if defined(GENESIS_AVX) || defined(GENESIS_AVX2) || defined(GENESIS_AVX512)
49 #include <immintrin.h>
83 1.0, 0.794328234724281, 0.630957344480193, 0.501187233627272,
84 0.398107170553497, 0.316227766016838, 0.251188643150958, 0.199526231496888,
85 0.158489319246111, 0.125892541179417,
86 0.1, 0.0794328234724281, 0.0630957344480193, 0.0501187233627272,
87 0.0398107170553497, 0.0316227766016838, 0.0251188643150958, 0.0199526231496888,
88 0.0158489319246111, 0.0125892541179417,
89 0.01, 0.00794328234724281, 0.00630957344480193, 0.00501187233627272,
90 0.00398107170553497, 0.00316227766016838, 0.00251188643150958, 0.00199526231496888,
91 0.00158489319246111, 0.00125892541179417,
92 0.001, 0.000794328234724281, 0.000630957344480193, 0.000501187233627273,
93 0.000398107170553497, 0.000316227766016838, 0.000251188643150958, 0.000199526231496888,
94 0.000158489319246111, 0.000125892541179417,
95 0.0001, 7.94328234724282e-05, 6.30957344480193e-05, 5.01187233627273e-05,
96 3.98107170553497e-05, 3.16227766016838e-05, 2.51188643150958e-05, 1.99526231496888e-05,
97 1.58489319246111e-05, 1.25892541179417e-05,
98 1e-05, 7.94328234724282e-06, 6.30957344480193e-06, 5.01187233627272e-06,
99 3.98107170553497e-06, 3.16227766016838e-06, 2.51188643150958e-06, 1.99526231496888e-06,
100 1.58489319246111e-06, 1.25892541179417e-06,
101 1e-06, 7.94328234724282e-07, 6.30957344480193e-07, 5.01187233627272e-07,
102 3.98107170553497e-07, 3.16227766016838e-07, 2.51188643150958e-07, 1.99526231496888e-07,
103 1.58489319246111e-07, 1.25892541179417e-07,
104 1e-07, 7.94328234724282e-08, 6.30957344480193e-08, 5.01187233627272e-08,
105 3.98107170553497e-08, 3.16227766016838e-08, 2.51188643150958e-08, 1.99526231496888e-08,
106 1.58489319246111e-08, 1.25892541179417e-08,
107 1e-08, 7.94328234724282e-09, 6.30957344480194e-09, 5.01187233627271e-09,
108 3.98107170553497e-09, 3.16227766016838e-09, 2.51188643150958e-09, 1.99526231496888e-09,
109 1.58489319246111e-09, 1.25892541179417e-09,
110 1e-09, 7.94328234724282e-10, 6.30957344480194e-10, 5.01187233627271e-10,
111 3.98107170553497e-10, 3.16227766016838e-10, 2.51188643150958e-10, 1.99526231496888e-10,
112 1.58489319246111e-10, 1.25892541179417e-10,
113 1e-10, 7.94328234724282e-11, 6.30957344480194e-11, 5.01187233627271e-11,
114 3.98107170553497e-11, 3.16227766016838e-11, 2.51188643150958e-11, 1.99526231496888e-11,
115 1.58489319246111e-11, 1.25892541179417e-11,
116 1e-11, 7.94328234724282e-12, 6.30957344480194e-12, 5.01187233627271e-12,
117 3.98107170553497e-12, 3.16227766016838e-12, 2.51188643150958e-12, 1.99526231496888e-12,
118 1.58489319246111e-12, 1.25892541179417e-12,
119 1e-12, 7.94328234724282e-13, 6.30957344480194e-13, 5.01187233627271e-13,
120 3.98107170553497e-13, 3.16227766016838e-13, 2.51188643150958e-13, 1.99526231496888e-13,
121 1.58489319246111e-13, 1.25892541179417e-13,
122 1e-13, 7.94328234724282e-14, 6.30957344480194e-14, 5.01187233627271e-14,
123 3.98107170553497e-14, 3.16227766016838e-14, 2.51188643150958e-14, 1.99526231496888e-14,
124 1.58489319246111e-14, 1.25892541179417e-14,
125 1e-14, 7.94328234724282e-15, 6.30957344480194e-15, 5.01187233627271e-15,
126 3.98107170553497e-15, 3.16227766016838e-15, 2.51188643150958e-15, 1.99526231496888e-15,
127 1.58489319246111e-15, 1.25892541179417e-15,
128 1e-15, 7.94328234724282e-16, 6.30957344480194e-16, 5.01187233627271e-16,
129 3.98107170553497e-16, 3.16227766016838e-16, 2.51188643150958e-16, 1.99526231496888e-16,
130 1.58489319246111e-16, 1.25892541179417e-16,
131 1e-16, 7.94328234724279e-17, 6.30957344480194e-17, 5.01187233627271e-17,
132 3.98107170553499e-17, 3.16227766016838e-17, 2.51188643150957e-17, 1.99526231496888e-17,
133 1.58489319246111e-17, 1.25892541179417e-17,
134 1e-17, 7.94328234724279e-18, 6.30957344480194e-18, 5.01187233627271e-18,
135 3.98107170553499e-18, 3.16227766016838e-18, 2.51188643150957e-18, 1.99526231496888e-18,
136 1.58489319246111e-18, 1.25892541179417e-18,
137 1e-18, 7.94328234724279e-19, 6.30957344480194e-19, 5.01187233627271e-19,
138 3.98107170553499e-19, 3.16227766016838e-19, 2.51188643150957e-19, 1.99526231496888e-19,
139 1.58489319246111e-19, 1.25892541179417e-19,
140 1e-19, 7.94328234724279e-20, 6.30957344480194e-20, 5.01187233627271e-20,
141 3.98107170553499e-20, 3.16227766016838e-20, 2.51188643150957e-20, 1.99526231496888e-20,
142 1.58489319246111e-20, 1.25892541179417e-20,
143 1e-20, 7.94328234724279e-21, 6.30957344480194e-21, 5.01187233627271e-21,
144 3.98107170553499e-21, 3.16227766016838e-21, 2.51188643150957e-21, 1.99526231496888e-21,
145 1.58489319246111e-21, 1.25892541179417e-21,
146 1e-21, 7.94328234724279e-22, 6.30957344480194e-22, 5.01187233627272e-22,
147 3.98107170553499e-22, 3.16227766016838e-22, 2.51188643150957e-22, 1.99526231496888e-22,
148 1.58489319246111e-22, 1.25892541179417e-22,
149 1e-22, 7.94328234724279e-23, 6.30957344480194e-23, 5.01187233627271e-23,
150 3.98107170553499e-23, 3.16227766016838e-23, 2.51188643150957e-23, 1.99526231496888e-23,
151 1.58489319246111e-23, 1.25892541179417e-23,
152 1e-23, 7.94328234724279e-24, 6.30957344480194e-24, 5.01187233627271e-24,
153 3.98107170553499e-24, 3.16227766016838e-24, 2.51188643150957e-24, 1.99526231496888e-24,
154 1.58489319246111e-24, 1.25892541179417e-24,
155 1e-24, 7.94328234724279e-25, 6.30957344480194e-25, 5.01187233627272e-25,
156 3.98107170553499e-25, 3.16227766016838e-25, 2.51188643150957e-25, 1.99526231496888e-25,
157 1.58489319246111e-25, 1.25892541179417e-25,
158 1e-25, 7.94328234724279e-26, 6.30957344480194e-26, 5.01187233627271e-26,
159 3.98107170553499e-26, 3.16227766016838e-26
171 throw std::invalid_argument(
179 auto const wo = with_offset;
182 return std::string(
"Sanger" ) + std::string( wo ?
" (ASCII offset 33)" :
"" );
184 return std::string(
"Illumina 1.3+" ) + std::string( wo ?
" (ASCII offset 64)" :
"" );
186 return std::string(
"Illumina 1.5+" ) + std::string( wo ?
" (ASCII offset 64)" :
"" );
188 return std::string(
"Illumina 1.8+" ) + std::string( wo ?
" (ASCII offset 33)" :
"" );
190 return std::string(
"Solexa" ) + std::string( wo ?
" (ASCII offset 64)" :
"" );
193 throw std::invalid_argument(
"Invalid quality encoding type." );
201 s.erase( std::remove_if(s.begin(), s.end(), [](
char c ){
202 return ! utils::is_alnum( c );
206 if( s ==
"sanger" ) {
208 }
else if( s ==
"illumina13" ) {
210 }
else if( s ==
"illumina15" ) {
212 }
else if( s ==
"illumina18" || s ==
"illumina" ) {
214 }
else if( s ==
"solexa" ) {
217 throw std::runtime_error(
"Invalid quality encoding name: \"" + name +
"\"" );
233 if( quality_code < 33 || quality_code >= 127 ) {
236 return static_cast<unsigned char>( quality_code ) - 33;
240 if( quality_code < 64 || quality_code >= 127 ) {
243 return static_cast<unsigned char>( quality_code ) - 64;
246 if( quality_code < 59 || quality_code >= 127 ) {
252 throw std::invalid_argument(
"Invalid quality encoding type." );
257 std::string
const& quality_codes,
261 auto result = std::vector<unsigned char>( quality_codes.size() );
283 throw std::invalid_argument(
"Invalid quality encoding type." );
300 auto const* data =
reinterpret_cast<__m256i const*
>( quality_codes.c_str() );
301 auto* write =
reinterpret_cast<__m256i*
>( result.data() );
305 auto const m_offset = _mm256_set1_epi8(
offset );
306 auto static const m_upper = _mm256_set1_epi8( 126 );
310 for(
size_t j = 0; j < quality_codes.size() / 32; ++j ) {
313 auto const m_data = _mm256_loadu_si256( data + j );
314 auto const m_min = _mm256_cmpgt_epi8( m_offset, m_data );
315 auto const m_max = _mm256_cmpgt_epi8( m_data, m_upper );
319 good &= _mm256_testz_si256( m_min, m_min ) && _mm256_testz_si256( m_max, m_max );
322 auto const m_result = _mm256_sub_epi8( m_data, m_offset );
323 _mm256_storeu_si256( write + j, m_result );
328 i = quality_codes.size() / 32 * 32;
330 #endif // GENESIS_AVX2
336 for( ; i < quality_codes.size(); ++i ) {
337 good &= ( quality_codes[i] >=
offset ) && ( quality_codes[i] <= 126 );
338 result[i] =
static_cast<unsigned char>( quality_codes[i] ) -
offset;
344 for(
size_t i = 0; i < quality_codes.size(); ++i ) {
345 if( quality_codes[i] <
offset || quality_codes[i] >= 127 ) {
388 for(
size_t i = 0; i < quality_codes.size(); ++i ) {
411 throw std::invalid_argument(
"Invalid quality encoding type." );
428 throw std::invalid_argument(
"Invalid quality encoding type." );
437 for(
size_t i = 0; i < char_counts.size(); ++i ) {
438 if( char_counts[i] > 0 ) {
446 for(
size_t i = char_counts.size(); i > 0; --i ) {
447 if( char_counts[i-1] > 0 ) {
454 if( min == char_counts.size() ) {
456 throw std::runtime_error(
457 "Cannot guess quality encoding, as all quality code counts are zero."
460 if( min < 33 || max >= 127 ) {
461 throw std::runtime_error(
462 "Invalid char counts provided to guess quality score encoding. Only printable "
463 "characters (ASCII range 33 to 127) are allowed in fastq quality encodings."
466 assert( min <= max );
499 std::shared_ptr< utils::BaseInputSource > source,
504 auto char_counts = std::array<size_t, 128>();
505 size_t lines_cnt = 0;
506 size_t chars_cnt = 0;
511 reader.quality_string_plugin(
512 [&]( std::string
const& quality_string,
Sequence& sequence )
515 for(
auto q : quality_string ) {
519 auto qu =
static_cast<unsigned char>(q);
521 throw std::invalid_argument(
522 "Invalid quality score character outside of the ASCII range."
528 assert( qu < char_counts.size() );
533 if( max_chars > 0 && chars_cnt > max_chars ) {
543 while( reader.parse_sequence( input_stream, seq ) ) {
545 if( max_chars > 0 && chars_cnt > max_chars ) {
552 if( max_lines > 0 && lines_cnt >= max_lines ) {
567 if( ! std::isfinite( error_probability ) || error_probability < 0.0 || error_probability > 1.0 ) {
568 throw std::invalid_argument(
569 "Cannot convert error probability outside of range [0.0, 1.0] to phred score."
576 double v = std::round( -10.0 * std::log10( error_probability ));
577 v = std::min( v, 255.0 );
578 return static_cast<unsigned char>( v );
589 if( ! std::isfinite( error_probability ) || error_probability < 0.0 || error_probability > 1.0 ) {
590 throw std::invalid_argument(
591 "Cannot convert error probability outside of range [0.0, 1.0] to solexa score."
609 double v = std::round( -10.0 * std::log10( error_probability / ( 1.0 - error_probability )));
610 v = std::min( v, 127.0 );
611 v = std::max( v, -128.0 );
612 return static_cast<signed char>( v );
617 if( solexa_score < -5 ) {
620 double const t = std::pow( 10.0,
static_cast<double>( solexa_score ) / -10.0 );
621 return t / ( 1.0 + t );
626 if( phred_score <= 1 ) {
629 double v = std::round(
630 10.0 * std::log10( std::pow( 10.0,
static_cast<double>( phred_score ) / 10.0 ) - 1.0 )
632 v = std::min( v, 127.0 );
633 return static_cast<signed char>( v );
639 return static_cast<unsigned char>( std::round(
640 10.0 * std::log10( std::pow( 10.0,
static_cast<double>( solexa_score ) / 10.0 ) + 1.0 )
646 auto res = std::vector<unsigned char>( error_probability.size(), 0 );
647 for(
size_t i = 0; i < error_probability.size(); ++i ) {
655 auto res = std::vector<double>( phred_score.size(), 0 );
656 for(
size_t i = 0; i < phred_score.size(); ++i ) {
664 auto res = std::vector<signed char>( error_probability.size(), 0 );
665 for(
size_t i = 0; i < error_probability.size(); ++i ) {
673 auto res = std::vector<double>( solexa_score.size(), 0 );
674 for(
size_t i = 0; i < solexa_score.size(); ++i ) {
682 auto res = std::vector<signed char>( phred_score.size(), 0 );
683 for(
size_t i = 0; i < phred_score.size(); ++i ) {
691 auto res = std::vector<unsigned char>( solexa_score.size(), 0 );
692 for(
size_t i = 0; i < solexa_score.size(); ++i ) {