|
A library for working with phylogenetic and population genetic data.
v0.27.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_FUNCTIONS_DIVERSITY_H_
2 #define GENESIS_POPULATION_FUNCTIONS_DIVERSITY_H_
46 #include <type_traits>
50 namespace population {
168 size_t nucleotide_count
177 template<
class ForwardIterator>
179 ForwardIterator begin, ForwardIterator end,
180 bool with_bessel =
true
183 for(
auto& it = begin; it != end; ++it ) {
198 template<
class ForwardIterator>
201 ForwardIterator begin,
209 for(
auto& it = begin; it != end; ++it ) {
212 pi_sum += ( pi_snp / denom );
239 template<
class ForwardIterator>
241 ForwardIterator begin, ForwardIterator end,
244 auto const psb =
static_cast<double>( poolsize ) / (
static_cast<double>( poolsize ) - 1.0 );
246 for(
auto& it = begin; it != end; ++it ) {
264 PoolDiversitySettings
const& settings,
265 size_t nucleotide_count
271 template<
class ForwardIterator>
274 ForwardIterator begin,
282 for(
auto& it = begin; it != end; ++it ) {
307 double a_n(
size_t n );
329 double b_n(
size_t n );
413 double n_base(
size_t coverage,
size_t poolsize );
420 PoolDiversitySettings
const& settings,
429 template<
class ForwardIterator>
432 ForwardIterator begin,
435 double theta_watterson
444 auto const snp_cnt = std::distance( begin, end );
446 return (
theta_pi - theta_watterson ) / denom;
453 template<
class ForwardIterator>
456 ForwardIterator begin,
483 template<
class ForwardIterator>
486 ForwardIterator begin,
507 for(
auto it = min_filtered_range.begin(); it != min_filtered_range.end(); ++it ) {
515 std::is_same<decltype(stat.is_snp),
bool>::value,
516 "Expect bool type for BaseCountsStatus::is_snp"
519 std::is_same<decltype(stat.is_covered),
bool>::value,
520 "Expect bool type for BaseCountsStatus::is_covered"
522 static_assert(
static_cast<size_t>(
true ) == 1,
"Expect true == 1" );
523 static_assert(
static_cast<size_t>(
false ) == 0,
"Expect false == 0" );
528 results.
snp_count +=
static_cast<size_t>( stat.is_snp );
536 return stat.is_covered && stat.is_snp;
537 }, min_filtered_range.begin(), min_filtered_range.end() );
541 settings, covered_snps_range.begin(), covered_snps_range.end()
544 settings, covered_snps_range.begin(), covered_snps_range.end()
547 settings, covered_snps_range.begin(), covered_snps_range.end(),
552 auto const coverage =
static_cast<double>( results.
coverage_count );
562 #endif // include guard
double theta_pi_pool(PoolDiversitySettings const &settings, ForwardIterator begin, ForwardIterator end)
Compute theta pi with pool-sequencing correction according to Kofler et al, that is,...
double sum(const Histogram &h)
double a_n(size_t n)
Compute a_n, the sum of reciprocals.
double heterozygosity(BaseCounts const &sample, bool with_bessel)
Compute classic heterozygosity.
double f_star(double a_n, double n)
Compute f* according to Achaz 2008 and Kofler et al. 2011.
double tajima_d_pool_denominator(PoolDiversitySettings const &settings, size_t snp_count, double theta)
Compute the denominator for the pool-sequencing correction of Tajima's D according to Kofler et al.
double theta_watterson_relative
double theta_watterson_pool(PoolDiversitySettings const &settings, ForwardIterator begin, ForwardIterator end)
Compute theta watterson with pool-sequencing correction according to Kofler et al.
double n_base_matrix(size_t coverage, size_t poolsize)
Compute the n_base term used for Tajima's D in Kofler et al. 2011, following their approach.
double theta_watterson_pool_denominator(PoolDiversitySettings const &settings, size_t nucleotide_count)
Compute the denominator for the pool-sequencing correction of theta watterson according to Kofler et ...
PoolDiversityResults pool_diversity_measures(PoolDiversitySettings const &settings, ForwardIterator begin, ForwardIterator end)
Compute Theta Pi, Theta Watterson, and Tajia's D in their pool-sequencing corrected versions accordin...
size_t nucleotide_sum(BaseCounts const &sample)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
double b_n(size_t n)
Compute b_n, the sum of squared reciprocals.
double n_base(size_t coverage, size_t poolsize)
Compute the n_base term used for Tajima's D in Kofler et al. 2011, using a faster closed form express...
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
double theta_watterson_absolute
BaseCountsStatus status(BaseCounts const &sample, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Compute a simple status with useful properties from the counts of a BaseCounts.
One set of nucleotide base counts, for example for a given sample that represents a pool of sequenced...
Range< FilterIterator< PredicateFunctor, BaseIterator > > make_filter_range(PredicateFunctor unary_func, BaseIterator begin, BaseIterator end)
Construct a filtering range, given the filter predicate function as well as the underlying base itera...
size_t variant_count
Count of variants in the iterator range that surpass the PoolDiversitySettings::min_allele_count sett...
double beta_star(double n)
Compute beta* according to Achaz 2008 and Kofler et al. 2011.
double theta_pi_pool_denominator(PoolDiversitySettings const &settings, size_t nucleotide_count)
Compute the denominator for the pool-sequencing correction of theta pi according to Kofler et al.
size_t coverage_count
Out of the variant_count many variants, how many are properly covered, that is, have coverage in betw...
bool with_popoolation_bugs
double tajima_d_pool(PoolDiversitySettings const &settings, ForwardIterator begin, ForwardIterator end, double theta_pi, double theta_watterson)
Compute the pool-sequencing corrected version of Tajima's D according to Kofler et al.
double theta_pi(ForwardIterator begin, ForwardIterator end, bool with_bessel=true)
Compute classic theta pi, that is, the sum of heterozygosities.
void transform_zero_out_by_min_count(BaseCounts &sample, size_t min_count)
Transform a BaseCounts sample by setting any nucleotide count (A, C, G, T) to zero if min_count is no...
size_t snp_count
Out of the variant_count and coverage_count many variants, how many are SNPs, that is,...
Range< TransformIterator< TransformFunctor, BaseIterator > > make_transform_range(TransformFunctor unary_func, BaseIterator begin, BaseIterator end)
Construct a transforming range, given the transformation function as well as the underlying base iter...
double theta_pi_within_pool(ForwardIterator begin, ForwardIterator end, size_t poolsize)
Compute classic theta pi (within a population), that is, the sum of heterozygosities including Bessel...
double alpha_star(double n)
Compute alpha* according to Achaz 2008 and Kofler et al. 2011.
Settings used by different pool-sequencing corrected diversity statistics.
Data struct to collect all diversity statistics computed by pool_diversity_measures().