|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_FUNCTION_DIVERSITY_POOL_PROCESSOR_H_
2 #define GENESIS_POPULATION_FUNCTION_DIVERSITY_POOL_PROCESSOR_H_
54 namespace population {
95 std::shared_ptr<utils::ThreadPool>
thread_pool =
nullptr,
98 : avg_policy_( window_average_policy )
101 , is_default_constructed_( false )
134 thread_pool_ = value;
140 return threading_threshold_;
154 threading_threshold_ = value;
167 std::vector<size_t>
const& pool_sizes
169 if( ! calculators_.empty() ) {
170 throw std::runtime_error(
171 "Cannot call DiversityPoolProcessor::add_calculators() multiple times."
174 for(
auto pool_size : pool_sizes ) {
176 results_.emplace_back();
186 return calculators_.size();
191 assert( results_.size() == calculators_.size() );
192 for(
size_t i = 0; i < results_.size(); ++i ) {
193 calculators_[i].reset();
196 filter_stats_.
clear();
202 if( is_default_constructed_ ) {
203 throw std::domain_error(
"Cannot use a default constructed FstPoolProcessor" );
209 if( variant.
samples.size() != calculators_.size() ) {
210 throw std::runtime_error(
211 "Invalid number of samples when computing Diversity Pool: Variant contains " +
213 std::to_string( calculators_.size() ) +
" pool sizes have been provided."
224 auto process_ = [&](
size_t index ) {
225 assert( index < calculators_.size() );
226 calculators_[index].process( variant.
samples[index] );
230 assert( variant.
samples.size() == calculators_.size() );
231 if( thread_pool_ && calculators_.size() >= threading_threshold_ ) {
233 0, variant.
samples.size(), process_, thread_pool_
236 for(
size_t i = 0; i < variant.
samples.size(); ++i ) {
252 std::vector<DiversityPoolCalculator::Result>
const&
get_result(
254 std::shared_ptr<GenomeLocusSet> provided_loci
256 assert( results_.size() == calculators_.size() );
257 for(
size_t i = 0; i < results_.size(); ++i ) {
259 avg_policy_, window, provided_loci, filter_stats_, calculators_[i].
get_filter_stats()
261 results_[i] = calculators_[i].get_result( window_avg_denom );
272 std::vector<DiversityPoolCalculator::Result>
const&
get_result()
const
274 assert( results_.size() == calculators_.size() );
275 for(
size_t i = 0; i < results_.size(); ++i ) {
276 results_[i] = calculators_[i].get_result( 1 );
290 return filter_stats_;
302 std::vector<DiversityPoolCalculator>::iterator
begin()
304 return calculators_.begin();
307 std::vector<DiversityPoolCalculator>::iterator
end()
309 return calculators_.end();
323 std::vector<DiversityPoolCalculator> calculators_;
330 mutable std::vector<DiversityPoolCalculator::Result> results_;
334 std::shared_ptr<utils::ThreadPool> thread_pool_;
335 size_t threading_threshold_ = 0;
339 bool is_default_constructed_ =
true;
359 std::vector<size_t>
const& pool_sizes
369 #endif // include guard
DiversityPoolProcessor & thread_pool(std::shared_ptr< utils::ThreadPool > value)
Set the thread pool used for processing, if enough sample pairs are being processed.
DiversityPoolProcessor & operator=(DiversityPoolProcessor const &)=default
Helper class to iterate over Variants and process the samples (SampleCounts), using a set of Diversit...
std::vector< DiversityPoolCalculator::Result > const & get_result(BaseWindow< D > const &window, std::shared_ptr< GenomeLocusSet > provided_loci) const
Get a list of all resulting values for all samples.
std::shared_ptr< ThreadPool > global_thread_pool() const
Return a global thread pool to be used for parallel computations.
VariantFilterStats get_filter_stats() const
Get the sum of filter statistics of all Variants processed here.
MultiFuture< void > parallel_for(T1 begin, T2 end, F &&body, std::shared_ptr< ThreadPool > thread_pool=nullptr, size_t num_blocks=0, bool auto_wait=true)
Parallel for over a range of positions, breaking the range into blocks for which the body function is...
Provides some valuable additions to STD.
std::vector< DiversityPoolCalculator >::iterator end()
Data struct to collect all diversity statistics computed here.
std::string to_string(GenomeLocus const &locus)
DiversityPoolProcessor()=default
Default constructor.
void process(Variant const &variant)
void add_calculators(DiversityPoolSettings const &settings, std::vector< size_t > const &pool_sizes)
Create and add a set of calculators for a given list of samples.
~DiversityPoolProcessor()=default
std::vector< DiversityPoolCalculator >::iterator begin()
DiversityPoolProcessor make_diversity_pool_processor(WindowAveragePolicy window_average_policy, DiversityPoolSettings const &settings, std::vector< size_t > const &pool_sizes)
Create an DiversityPoolProcessor to compute diversity for all samples.
DiversityPoolProcessor & threading_threshold(size_t value)
Set the threshold of calculators after which the processing is done in threads.
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
double window_average_denominator(WindowAveragePolicy policy, BaseWindow< D > const &window, std::shared_ptr< GenomeLocusSet > provided_loci, VariantFilterStats const &variant_filter_stats, SampleCountsFilterStats const &sample_counts_filter_stats)
Get the denoninator to use for averaging an estimator across a window.
size_t threading_threshold() const
WindowAveragePolicy
Select the method to use for computing window averages of statistic estimators.
std::vector< SampleCounts > samples
DiversityPoolProcessor(WindowAveragePolicy window_average_policy, std::shared_ptr< utils::ThreadPool > thread_pool=nullptr, size_t threading_threshold=4096)
Construct a processor.
static Options & get()
Returns a single instance of this class.
std::vector< DiversityPoolCalculator > const & calculators() const
Base class for Window and WindowView, to share common functionality.
std::vector< DiversityPoolCalculator::Result > const & get_result() const
Get a list of all resulting values for all samples.
std::shared_ptr< utils::ThreadPool > thread_pool() const
Get the thread pool used for processing, if enough sample pairs are being processed.
Compute Theta Pi, Theta Watterson, and Tajia's D in their pool-sequencing corrected versions accordin...
Settings used by different pool-sequencing corrected diversity statistics.