43 #include <unordered_map>
50 namespace population {
67 size_t nucleotide_count,
68 size_t allele_frequency
99 if( allele_frequency == 0 ) {
100 throw std::invalid_argument(
101 "In computing amnm_(), allele_frequency == 0 is not allowed. "
102 "This is likely caused by using DiversityPoolSettings.min_count == 0."
111 auto const k = allele_frequency;
112 auto const n = nucleotide_count;
118 for(
size_t r = 1; r <= poolsize - 1; ++r ) {
121 double const p =
static_cast<double>( r ) /
static_cast<double>( poolsize );
122 assert( std::isfinite( p ) && 0.0 < p && p < 1.0 );
126 double const log_pow_1 =
static_cast<double>( k ) * std::log( p );
127 double const log_pow_2 =
static_cast<double>( n - k ) * std::log( 1.0 - p );
128 double const binom = std::exp( log_coeff + log_pow_1 + log_pow_2 );
134 double const partial = binom /
static_cast<double>( r );
139 if( ! std::isfinite( result )) {
155 double const nt_cnt =
static_cast<double>(
nucleotide_sum( sample ));
167 h *= nt_cnt / ( nt_cnt - 1.0 );
175 size_t nucleotide_count
184 size_t min_count,
size_t poolsize,
size_t nucleotide_count
189 if( 2 * min_count > nucleotide_count ) {
202 for(
size_t m_it = min_count; m_it <= ( nucleotide_count - min_count ); ++m_it ) {
205 double const m =
static_cast<double>( m_it );
206 double const M =
static_cast<double>( nucleotide_count );
209 double const term = ( 2.0 * m * ( M - m )) / ( M * ( M - 1.0 ));
210 double const partial = term *
amnm_( poolsize, nucleotide_count, m_it );
216 if( ! std::isfinite( denom )) {
224 return denom_cache_( settings.
min_count, poolsize, nucleotide_count );
234 size_t nucleotide_count
243 size_t min_count,
size_t poolsize,
size_t nucleotide_count
248 if( 2 * min_count > nucleotide_count ) {
261 for(
size_t m_it = min_count; m_it <= ( nucleotide_count - min_count ); ++m_it ) {
264 auto const anmn =
amnm_( poolsize, nucleotide_count, m_it );
270 if( ! std::isfinite( denom )) {
278 return denom_cache_( settings.
min_count, poolsize, nucleotide_count );
290 for(
size_t i = 1; i < n; ++i ) {
291 sum += 1.0 /
static_cast<double>( i );
302 assert( std::isfinite(n) && n >= 0.0 );
303 auto const ni =
static_cast<size_t>( std::round( n ));
304 return a_n_cache_( ni );
312 for(
size_t i = 1; i < n; ++i ) {
313 sum += 1.0 / (
static_cast<double>( i ) *
static_cast<double>( i ));
319 assert( std::isfinite(n) && n >= 0.0 );
320 auto const ni =
static_cast<size_t>( std::round( n ));
321 return b_n_cache_( ni );
326 double const nd =
static_cast<double>( n );
327 return ( nd - 3.0 ) / (
a_n * ( nd - 1.0 ) - nd );
333 throw std::invalid_argument(
"Cannot compute alpha star with effective read depth n <= 1" );
341 double const nd =
static_cast<double>( n );
342 double const an =
a_n( n );
343 double const fs =
f_star( an, n );
346 auto const t1 =
squared( fs ) * ( an - ( nd / ( nd - 1.0 )));
347 auto const t2s1 = an * (( 4.0 * ( nd + 1.0 )) /
squared( nd - 1.0 ));
348 auto const t2s2 = 2.0 * (( nd + 3.0 ) / ( nd - 1.0 ));
349 auto const t2 = fs * ( t2s1 - t2s2 );
350 auto const t3 = an * (( 8.0 * ( nd + 1.0 )) / ( nd *
squared( nd - 1.0 )));
351 auto const t4 = (
squared( nd ) + nd + 60.0 ) / ( 3.0 * nd * ( nd - 1.0 ));
352 return t1 + t2 - t3 + t4;
355 return alpha_star_cache_( n );
361 throw std::invalid_argument(
"Cannot compute beta star with effective read depth n <= 1" );
369 double const nd =
static_cast<double>( n );
370 double const an =
a_n( n );
371 double const bn =
b_n( n );
372 double const fs =
f_star( an, n );
377 auto const t1 =
squared( fs ) * ( bn - ((( 2.0 * nd ) - 1.0 ) /
squared( nd - 1.0 )));
378 auto const t2s1 = bn * ( 8.0 / ( nd - 1.0 ));
379 auto const t2s2 = an * ( 4.0 / ( nd * ( nd - 1.0 )));
380 auto const t2s3n =
cubed( nd ) + 12.0 *
squared( nd ) - 35.0 * nd + 18.0;
381 auto const t2s3d = nd *
squared( nd - 1.0 );
382 auto const t2s3 = t2s3n / t2s3d;
383 auto const t2 = fs * ( t2s1 - t2s2 - t2s3 );
384 auto const t3 = bn * ( 16.0 / ( nd * ( nd - 1.0 )));
385 auto const t4 = an * ( 8.0 / (
squared( nd ) * ( nd - 1.0 )));
386 auto const t5s1 = 2.0 * ( std::pow( nd, 4 ) + 110.0 *
squared( nd ) - 255.0 * nd + 126.0 );
388 auto const t5 = t5s1 / t5s2;
389 return t1 + t2 - t3 + t4 + t5;
392 return beta_star_cache_( n );
396 size_t max_read_depth,
size_t poolsize
402 auto const max_width = poolsize;
406 double const poold =
static_cast<double>( poolsize );
409 result( 0, 0 ) = 1.0;
410 for(
size_t i = 1; i < max_read_depth + 1; ++i ) {
411 result( i, 0 ) = 0.0;
413 for(
size_t j = 1; j < max_width + 1; ++j ) {
414 result( 0, j ) = 0.0;
418 for(
size_t i = 1; i < max_read_depth + 1; ++i ) {
419 for(
size_t j = 1; j < max_width + 1; ++j ) {
420 auto const t1s1 = (( 1.0 + poold -
static_cast<double>( j )) / poold );
421 auto const t1s2 = result( i - 1, j - 1 );
422 auto const t2 = (
static_cast<double>( j ) / poold ) * result( i-1, j );
423 result( i, j ) = t1s1 * t1s2 + t2;
430 size_t max_read_depth,
size_t poolsize
444 static std::unordered_map<size_t, PijMatrix> pij_matrix_cache_;
447 static std::mutex guard_;
448 std::lock_guard<std::mutex> lock( guard_ );
452 pij_matrix_cache_.count( poolsize ) == 0 ||
453 max_read_depth >= pij_matrix_cache_.at( poolsize ).rows() ||
454 poolsize + 1 != pij_matrix_cache_.at( poolsize ).cols()
458 pij_matrix_cache_[ poolsize ] =
pij_matrix_( 3 * max_read_depth, poolsize );
461 assert( pij_matrix_cache_.count( poolsize ) > 0 );
462 assert( max_read_depth <= pij_matrix_cache_.at( poolsize ).rows() );
463 assert( poolsize <= pij_matrix_cache_.at( poolsize ).cols() );
464 return pij_matrix_cache_[ poolsize ];
482 size_t read_depth,
size_t poolsize
489 auto const minj = ( read_depth < poolsize ) ? read_depth : poolsize;
490 for(
size_t k = 1; k <= minj; ++k ) {
491 assert( read_depth < pij_matrix.rows() );
492 assert( k < pij_matrix.cols() );
493 nbase +=
static_cast<double>( k ) * pij_matrix( read_depth, k );
498 return nbase_cache_( read_depth, poolsize );
501 double n_base(
size_t read_depth,
size_t poolsize )
511 double const p =
static_cast<double>( read_depth );
512 double const n =
static_cast<double>( poolsize );
513 return n * ( 1.0 - std::pow(( n - 1.0 ) / n, p ));
524 double window_avg_denom,
525 size_t empirical_min_read_depth
540 throw std::invalid_argument(
541 "Minimum allele count needs to be set to 2 for calculating pool-corrected Tajima's D "
542 "with tajima_d_pool() according to Kofler et al. In case 2 is insufficient, "
543 "we recommend to subsample the reads to a smaller read depth."
547 throw std::invalid_argument(
548 "Minimum read depth of 0 is not valid for calculating pool-corrected Tajima's D "
549 "with tajima_d_pool()."
553 throw std::invalid_argument(
554 "Invalid minimum read depth >> pool size (as internal aproximation we use: "
555 "3 * minimum read depth < pool size) in tajima_d_pool()"
566 auto const avg_n =
n_base( empirical_min_read_depth, poolsize );
584 auto const avg_n =
n_base( poolsize, poolsize );
586 betastar = alphastar;
602 throw std::invalid_argument(
"Invalid enum value for TajimaDenominatorPolicy" );
607 ( alphastar / window_avg_denom ) * theta + betastar *
squared( theta )