1 #ifndef GENESIS_POPULATION_FUNCTION_FST_POOL_UNBIASED_H_
2 #define GENESIS_POPULATION_FUNCTION_FST_POOL_UNBIASED_H_
59 namespace population {
108 size_t smp1_poolsize,
109 size_t smp2_poolsize,
113 : smp1_poolsize_( smp1_poolsize )
114 , smp2_poolsize_( smp2_poolsize )
115 , avg_policy_( window_average_policy )
133 void reset_()
override
137 sample_filter_stats_smp1_.
clear();
138 sample_filter_stats_smp2_.
clear();
139 sample_filter_stats_b_.
clear();
140 pi_w_smp1_sum_ = 0.0;
141 pi_w_smp2_sum_ = 0.0;
145 void process_( SampleCounts
const& smp1, SampleCounts
const& smp2 )
override
148 auto pi_within_partial_ = [](
149 double poolsize,
double freq_a,
double freq_c,
double freq_g,
double freq_t,
double nt_cnt
151 assert( poolsize > 1.0 );
158 result *= nt_cnt / ( nt_cnt - 1.0 );
159 result *= poolsize / ( poolsize - 1.0 );
164 double smp1_nt_cnt = 0.0;
165 double smp1_freq_a = 0.0;
166 double smp1_freq_c = 0.0;
167 double smp1_freq_g = 0.0;
168 double smp1_freq_t = 0.0;
171 double smp2_nt_cnt = 0.0;
172 double smp2_freq_a = 0.0;
173 double smp2_freq_c = 0.0;
174 double smp2_freq_g = 0.0;
175 double smp2_freq_t = 0.0;
178 ++sample_filter_stats_smp1_[ smp1.status.get() ];
179 if( smp1.status.passing() ) {
181 smp1_freq_a =
static_cast<double>( smp1.a_count ) / smp1_nt_cnt;
182 smp1_freq_c =
static_cast<double>( smp1.c_count ) / smp1_nt_cnt;
183 smp1_freq_g =
static_cast<double>( smp1.g_count ) / smp1_nt_cnt;
184 smp1_freq_t =
static_cast<double>( smp1.t_count ) / smp1_nt_cnt;
186 auto const pi_within_smp1 = pi_within_partial_(
187 smp1_poolsize_, smp1_freq_a, smp1_freq_c, smp1_freq_g, smp1_freq_t, smp1_nt_cnt
189 if( std::isfinite( pi_within_smp1 )) {
190 pi_w_smp1_sum_ += pi_within_smp1;
191 assert( smp1.a_count + smp1.c_count + smp1.g_count + smp1.t_count > 0 );
193 assert( smp1.a_count + smp1.c_count + smp1.g_count + smp1.t_count <= 1 );
198 ++sample_filter_stats_smp2_[ smp2.status.get() ];
199 if( smp2.status.passing() ) {
201 smp2_freq_a =
static_cast<double>( smp2.a_count ) / smp2_nt_cnt;
202 smp2_freq_c =
static_cast<double>( smp2.c_count ) / smp2_nt_cnt;
203 smp2_freq_g =
static_cast<double>( smp2.g_count ) / smp2_nt_cnt;
204 smp2_freq_t =
static_cast<double>( smp2.t_count ) / smp2_nt_cnt;
206 auto const pi_within_smp2 = pi_within_partial_(
207 smp2_poolsize_, smp2_freq_a, smp2_freq_c, smp2_freq_g, smp2_freq_t, smp2_nt_cnt
209 if( std::isfinite( pi_within_smp2 )) {
210 pi_w_smp2_sum_ += pi_within_smp2;
211 assert( smp2.a_count + smp2.c_count + smp2.g_count + smp2.t_count > 0 );
213 assert( smp2.a_count + smp2.c_count + smp2.g_count + smp2.t_count <= 1 );
218 if( smp1.status.passing() && smp2.status.passing() ) {
219 double pi_between = 1.0;
220 pi_between -= smp1_freq_a * smp2_freq_a;
221 pi_between -= smp1_freq_c * smp2_freq_c;
222 pi_between -= smp1_freq_g * smp2_freq_g;
223 pi_between -= smp1_freq_t * smp2_freq_t;
224 if( std::isfinite( pi_between )) {
225 pi_b_sum_ += pi_between;
229 }
else if( ! smp1.status.passing() && ! smp2.status.passing() ) {
230 ++sample_filter_stats_b_[ std::min( smp1.status.get(), smp2.status.get() )];
232 ++sample_filter_stats_b_[ std::max( smp1.status.get(), smp2.status.get() )];
236 double get_result_()
const override
245 switch( estimator_ ) {
253 throw std::invalid_argument(
"Invalid FstPoolCalculatorUnbiased::Estimator" );
272 std::shared_ptr<GenomeLocusSet> provided_loci,
275 switch( estimator_ ) {
277 return get_result_pair( window, provided_loci, variant_filter_stats ).first;
280 return get_result_pair( window, provided_loci, variant_filter_stats ).second;
283 throw std::invalid_argument(
"Invalid FstPoolCalculatorUnbiased::Estimator" );
295 std::shared_ptr<GenomeLocusSet> provided_loci,
299 auto const pi_within =
get_pi_within( window, provided_loci, variant_filter_stats );
300 auto const pi_between =
get_pi_between( window, provided_loci, variant_filter_stats );
301 auto const pi_total =
get_pi_total( pi_within, pi_between );
304 double const fst_nei = 1.0 - ( pi_within / pi_total );
305 double const fst_hud = 1.0 - ( pi_within / pi_between );
306 return { fst_nei, fst_hud };
312 std::shared_ptr<GenomeLocusSet> provided_loci,
316 avg_policy_, window, provided_loci, variant_filter_stats, sample_filter_stats_smp1_
319 avg_policy_, window, provided_loci, variant_filter_stats, sample_filter_stats_smp2_
321 return 0.5 * ( pi_w_smp1 + pi_w_smp2 );
327 std::shared_ptr<GenomeLocusSet> provided_loci,
331 avg_policy_, window, provided_loci, variant_filter_stats, sample_filter_stats_b_
337 return 0.5 * ( pi_within + pi_between );
343 std::shared_ptr<GenomeLocusSet> provided_loci,
346 auto const pi_within =
get_pi_within( window, provided_loci, variant_filter_stats );
347 auto const pi_between =
get_pi_between( window, provided_loci, variant_filter_stats );
354 std::shared_ptr<GenomeLocusSet> provided_loci,
380 auto const pi_total =
get_pi_total( pi_within, pi_between );
383 double const fst_nei = 1.0 - ( pi_within / pi_total );
384 double const fst_hud = 1.0 - ( pi_within / pi_between );
385 return { fst_nei, fst_hud };
390 auto const pi_w_smp1 = pi_w_smp1_sum_.
get();
391 auto const pi_w_smp2 = pi_w_smp2_sum_.
get();
392 return 0.5 * ( pi_w_smp1 + pi_w_smp2 );
397 return pi_b_sum_.
get();
423 size_t smp1_poolsize_ = 0;
424 size_t smp2_poolsize_ = 0;
449 switch( estimator ) {
457 throw std::invalid_argument(
"Invalid FstPoolCalculatorUnbiased::Estimator" );
463 std::string
const& str
469 if( low ==
"hudson" ) {
472 throw std::invalid_argument(
"Invalid FstPoolCalculatorUnbiased::Estimator: " + str );
478 #endif // include guard