40 namespace population {
53 double freq_a,
double freq_c,
double freq_g,
double freq_t,
double nt_cnt
60 result *= nt_cnt / ( nt_cnt - 1.0 );
69 double const p1_nt_cnt =
static_cast<double>(
nucleotide_sum( p1 ));
70 double const p1_freq_a =
static_cast<double>( p1.
a_count ) / p1_nt_cnt;
71 double const p1_freq_c =
static_cast<double>( p1.
c_count ) / p1_nt_cnt;
72 double const p1_freq_g =
static_cast<double>( p1.
g_count ) / p1_nt_cnt;
73 double const p1_freq_t =
static_cast<double>( p1.
t_count ) / p1_nt_cnt;
76 double const p2_nt_cnt =
static_cast<double>(
nucleotide_sum( p2 ));
77 double const p2_freq_a =
static_cast<double>( p2.
a_count ) / p2_nt_cnt;
78 double const p2_freq_c =
static_cast<double>( p2.
c_count ) / p2_nt_cnt;
79 double const p2_freq_g =
static_cast<double>( p2.
g_count ) / p2_nt_cnt;
80 double const p2_freq_t =
static_cast<double>( p2.
t_count ) / p2_nt_cnt;
83 double const min_cnt = std::min( p1_nt_cnt, p2_nt_cnt );
84 double const avg_a = ( p1_freq_a + p2_freq_a ) / 2.0;
85 double const avg_c = ( p1_freq_c + p2_freq_c ) / 2.0;
86 double const avg_g = ( p1_freq_g + p2_freq_g ) / 2.0;
87 double const avg_t = ( p1_freq_t + p2_freq_t ) / 2.0;
90 auto const p1_pi = pi_snp_( p1_freq_a, p1_freq_c, p1_freq_g, p1_freq_t, p1_nt_cnt );
91 auto const p2_pi = pi_snp_( p2_freq_a, p2_freq_c, p2_freq_g, p2_freq_t, p2_nt_cnt );
92 auto const pp_pi = pi_snp_( avg_a, avg_c, avg_g, avg_t, min_cnt );
94 return std::tuple<double, double, double>{ p1_pi, p2_pi, pp_pi };
102 std::pair<SortedBaseCounts, SortedBaseCounts>
const& sample_counts
110 sample_counts.first[2].count != 0 || sample_counts.first[3].count != 0 ||
111 sample_counts.second[2].count != 0 || sample_counts.second[3].count != 0
113 throw std::runtime_error(
114 "In f_st_pool_karlsson(): Expecting biallelic SNPs where only "
115 "two nucleotide counts are > 0, but found more non-zero counts."
121 assert( sample_counts.first[2].count == 0 && sample_counts.first[3].count == 0 );
122 assert( sample_counts.second[2].count == 0 && sample_counts.second[3].count == 0 );
124 sample_counts.first[0].base == sample_counts.second[0].base &&
125 sample_counts.first[1].base == sample_counts.second[1].base &&
126 sample_counts.first[2].base == sample_counts.second[2].base &&
127 sample_counts.first[3].base == sample_counts.second[3].base
133 auto const a_1 =
static_cast<double>( sample_counts.first[0].count );
134 auto const b_1 =
static_cast<double>( sample_counts.first[1].count );
135 auto const n_1 = a_1 + b_1;
136 auto const a_2 =
static_cast<double>( sample_counts.second[0].count );
137 auto const b_2 =
static_cast<double>( sample_counts.second[1].count );
138 auto const n_2 = a_2 + b_2;
141 if( n_1 <= 1.0 || n_2 <= 1.0 ) {
146 assert( a_1 <= n_1 );
147 assert( a_2 <= n_2 );
150 double const h1 = ( a_1 * b_1 ) / ( n_1 * ( n_1 - 1.0 ));
151 double const h2 = ( a_2 * b_2 ) / ( n_2 * ( n_2 - 1.0 ));
154 double const sqr =
squared(( a_1 / n_1 ) - ( a_2 / n_2 ));
155 double const sub = ( h1 / n_1 + h2 / n_2 );
156 double const nk = sqr - sub;
157 double const dk = nk + h1 + h2;
167 size_t p1_poolsize,
size_t p2_poolsize,
176 auto pi_within_partial_ = [](
177 double poolsize,
double freq_a,
double freq_c,
double freq_g,
double freq_t,
double nt_cnt
179 assert( poolsize > 1.0 );
186 result *= nt_cnt / ( nt_cnt - 1.0 );
187 result *= poolsize / ( poolsize - 1.0 );
192 double const p1_nt_cnt =
static_cast<double>(
nucleotide_sum( p1_counts ));
193 double const p1_freq_a =
static_cast<double>( p1_counts.
a_count ) / p1_nt_cnt;
194 double const p1_freq_c =
static_cast<double>( p1_counts.
c_count ) / p1_nt_cnt;
195 double const p1_freq_g =
static_cast<double>( p1_counts.
g_count ) / p1_nt_cnt;
196 double const p1_freq_t =
static_cast<double>( p1_counts.
t_count ) / p1_nt_cnt;
199 double const p2_nt_cnt =
static_cast<double>(
nucleotide_sum( p2_counts ));
200 double const p2_freq_a =
static_cast<double>( p2_counts.
a_count ) / p2_nt_cnt;
201 double const p2_freq_c =
static_cast<double>( p2_counts.
c_count ) / p2_nt_cnt;
202 double const p2_freq_g =
static_cast<double>( p2_counts.
g_count ) / p2_nt_cnt;
203 double const p2_freq_t =
static_cast<double>( p2_counts.
t_count ) / p2_nt_cnt;
206 auto const pi_within = 0.5 * (
207 pi_within_partial_( p1_poolsize, p1_freq_a, p1_freq_c, p1_freq_g, p1_freq_t, p1_nt_cnt ) +
208 pi_within_partial_( p2_poolsize, p2_freq_a, p2_freq_c, p2_freq_g, p2_freq_t, p2_nt_cnt )
212 double pi_between = 1.0;
213 pi_between -= p1_freq_a * p2_freq_a;
214 pi_between -= p1_freq_c * p2_freq_c;
215 pi_between -= p1_freq_g * p2_freq_g;
216 pi_between -= p1_freq_t * p2_freq_t;
219 double const pi_total = 0.5 * ( pi_within + pi_between );
221 return std::tuple<double, double, double>{ pi_within, pi_between, pi_total };