|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
43 namespace population {
52 bool skip_if_below_target_depth
56 if( skip_if_below_target_depth && total_sum <= target_depth ) {
62 double const scale =
static_cast<double>(target_depth) /
static_cast<double>(total_sum);
63 size_t a_count =
static_cast<size_t>(
static_cast<double>( sample.
a_count ) *
scale );
64 size_t c_count =
static_cast<size_t>(
static_cast<double>( sample.
c_count ) *
scale );
65 size_t g_count =
static_cast<size_t>(
static_cast<double>( sample.
g_count ) *
scale );
66 size_t t_count =
static_cast<size_t>(
static_cast<double>( sample.
t_count ) *
scale );
67 size_t n_count =
static_cast<size_t>(
static_cast<double>( sample.
n_count ) *
scale );
68 size_t d_count =
static_cast<size_t>(
static_cast<double>( sample.
d_count ) *
scale );
73 size_t const new_sum = a_count + c_count + g_count + t_count + n_count + d_count;
74 assert( new_sum <= target_depth );
75 size_t const remainder = target_depth - new_sum;
76 assert( remainder < 6 );
85 auto frac = std::array<double, 6>{{
86 static_cast<double>( sample.
a_count ) /
static_cast<double>( total_sum ),
87 static_cast<double>( sample.
c_count ) /
static_cast<double>( total_sum ),
88 static_cast<double>( sample.
g_count ) /
static_cast<double>( total_sum ),
89 static_cast<double>( sample.
t_count ) /
static_cast<double>( total_sum ),
90 static_cast<double>( sample.
n_count ) /
static_cast<double>( total_sum ),
91 static_cast<double>( sample.
d_count ) /
static_cast<double>( total_sum )
101 auto const count_refs = std::array<size_t*, 6>{{
102 &a_count, &c_count, &g_count, &t_count, &n_count, &d_count
111 double const interval_len = 1.0 /
static_cast<double>( remainder );
112 for(
size_t i = 0; i < remainder; ++i ) {
119 for(
size_t k = 0; k < 6; ++k ) {
120 if( frac[sorting_order[k]] > max_f ) {
121 max_f = frac[sorting_order[k]];
124 if( frac[sorting_order[k]] >= interval_len ) {
134 frac[sorting_order[max_k]] -= interval_len;
135 ++(*count_refs[sorting_order[max_k]]);
161 for(
auto& sample : variant.
samples ) {
178 for(
auto& sample : variant.
samples ) {
191 template<
typename Distribution>
195 Distribution distribution,
196 bool skip_if_below_target_depth
200 if( skip_if_below_target_depth && total_sum <= max_depth ) {
205 auto const new_counts = distribution(
206 std::vector<size_t>{{
216 assert( new_counts.size() == 6 );
219 sample.
a_count = new_counts[0];
220 sample.
c_count = new_counts[1];
221 sample.
g_count = new_counts[2];
222 sample.
t_count = new_counts[3];
223 sample.
n_count = new_counts[4];
224 sample.
d_count = new_counts[5];
233 return resample_counts_<std::vector<size_t>(*)(std::vector<size_t>
const&, size_t)>(
243 for(
auto& sample : variant.
samples ) {
253 return resample_counts_<std::vector<size_t>(*)(std::vector<size_t>
const&, size_t)>(
263 for(
auto& sample : variant.
samples ) {
273 return resample_counts_<std::vector<size_t>(*)(std::vector<size_t>
const&, size_t)>(
283 for(
auto& sample : variant.
samples ) {
void resample_counts(SampleCounts &sample, size_t target_depth)
Resample all counts in a SampleCounts sample to a new target_depth.
Provides some valuable algorithms that are not part of the C++ 11 STL.
void rescale_counts_(SampleCounts &sample, size_t target_depth, bool skip_if_below_target_depth)
One set of nucleotide sample counts, for example for a given sample that represents a pool of sequenc...
size_type a_count
Count of all A nucleotides that are present in the sample.
void scale(Histogram &h, double factor)
size_type t_count
Count of all T nucleotides that are present in the sample.
void resample_counts_(SampleCounts &sample, size_t max_depth, Distribution distribution, bool skip_if_below_target_depth)
Local helper function to avoid code duplication. Takes the distribution (with or without replacement)...
std::vector< size_t > multinomial_distribution(std::vector< size_t > const &p, size_t n)
Select a random sample following a multinomial distribution.
Provides easy and fast logging functionality.
size_type n_count
Count of all N (undetermined/any) nucleotides that are present in the sample.
size_type c_count
Count of all C nucleotides that are present in the sample.
std::vector< size_t > multivariate_hypergeometric_distribution(std::vector< size_t > const &p, size_t n)
Select a random sample following a multivariate hypergeometric distribution.
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
void subsample_counts_with_replacement(SampleCounts &sample, size_t max_depth)
Transform a SampleCounts sample by subsampling the nucleotide counts (A, C, G, T, as well as N and D)...
void subsample_counts_without_replacement(SampleCounts &sample, size_t max_depth)
Transform a SampleCounts sample by subsampling the nucleotide counts (A, C, G, T, as well as N and D)...
std::vector< SampleCounts > samples
void subscale_counts(SampleCounts &sample, size_t max_depth)
Transform a SampleCounts sample by sub-scaling the base counts (A, C, G, T, as well as N and D) to su...
size_type d_count
Count of all deleted (*) nucleotides that are present in the sample.
void rescale_counts(SampleCounts &sample, size_t target_depth)
Transform a SampleCounts sample by re-scaling the base counts (A, C, G, T, as well as N and D) to sum...
constexpr size_t sample_counts_sum(SampleCounts const &sample)
Sum up all the base counts at this sample, that is, the sum of all A, C, G, T, as well as the N and D...
std::array< size_t, 6 > sample_counts_sorting_order(std::array< T, 6 > const &v)
Return the sorting order of six values, for instance of the four nucleotides ACGT and the N and D cou...
size_type g_count
Count of all G nucleotides that are present in the sample.