|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file. 1 #ifndef GENESIS_POPULATION_FUNCTION_FUNCTIONS_H_
2 #define GENESIS_POPULATION_FUNCTION_FUNCTIONS_H_
47 namespace population {
61 c ==
'A' || c ==
'a' ||
62 c ==
'C' || c ==
'c' ||
63 c ==
'G' || c ==
'g' ||
76 c ==
'A' || c ==
'a' ||
77 c ==
'C' || c ==
'c' ||
78 c ==
'G' || c ==
'g' ||
79 c ==
'T' || c ==
't' ||
132 auto indices = std::array<size_t, 4>{{ 0, 1, 2, 3 }};
133 if( values[indices[0]] < values[indices[1]] ) {
136 if( values[indices[2]] < values[indices[3]] ) {
139 if( values[indices[0]] < values[indices[2]] ) {
142 if( values[indices[1]] < values[indices[3]] ) {
145 if( values[indices[1]] < values[indices[2]] ) {
150 assert( values[indices[0]] >= values[indices[1]] );
151 assert( values[indices[1]] >= values[indices[2]] );
152 assert( values[indices[2]] >= values[indices[3]] );
171 int i0 = (v[0] < v[1]) + (v[0] < v[2]) + (v[0] < v[3]) + (v[0] < v[4]) + (v[0] < v[5]);
172 int i1 = (v[1] <= v[0]) + (v[1] < v[2]) + (v[1] < v[3]) + (v[1] < v[4]) + (v[1] < v[5]);
173 int i2 = (v[2] <= v[0]) + (v[2] <= v[1]) + (v[2] < v[3]) + (v[2] < v[4]) + (v[2] < v[5]);
174 int i3 = (v[3] <= v[0]) + (v[3] <= v[1]) + (v[3] <= v[2]) + (v[3] < v[4]) + (v[3] < v[5]);
175 int i4 = (v[4] <= v[0]) + (v[4] <= v[1]) + (v[4] <= v[2]) + (v[4] <= v[3]) + (v[4] < v[5]);
176 int i5 = (v[5] <= v[0]) + (v[5] <= v[1]) + (v[5] <= v[2]) + (v[5] <= v[3]) + (v[5] <= v[4]);
177 assert( i0 + i1 + i2 + i3 + i4 + i5 == 15 );
180 std::array<size_t, 6> order;
189 assert( v[order[0]] >= v[order[1]] );
190 assert( v[order[1]] >= v[order[2]] );
191 assert( v[order[2]] >= v[order[3]] );
192 assert( v[order[3]] >= v[order[4]] );
193 assert( v[order[4]] >= v[order[5]] );
210 SampleCounts
const& sample_a,
211 SampleCounts
const& sample_b
246 size_t allele_count( SampleCounts
const& sample,
size_t min_count );
257 size_t allele_count( SampleCounts
const& sample,
size_t min_count,
size_t max_count );
267 void merge_inplace( SampleCounts& p1, SampleCounts
const& p2 );
272 SampleCounts
merge( SampleCounts
const& p1, SampleCounts
const& p2 );
353 std::pair<char, double>
consensus( SampleCounts
const& sample );
376 Variant
const& variant,
395 Variant
const& variant,
457 std::ostream&
operator<<( std::ostream& os, SampleCounts
const& bs );
462 #endif // include guard
void swap(Sample &lhs, Sample &rhs)
size_t total_sample_counts_sum(Variant const &variant, SampleCountsFilterPolicy filter_policy)
Sum up all the base counts at this sample, that is, the sum of all A, C, G, T, as well as the N and D...
SampleCounts merge(SampleCounts const &p1, SampleCounts const &p2)
Merge the counts of two SampleCountss.
std::ostream & operator<<(std::ostream &os, SampleCounts const &bs)
Output stream operator for SampleCounts instances.
void merge_inplace(SampleCounts &p1, SampleCounts const &p2)
Merge the counts of two SampleCountss, by adding the counts of the second (p2) to the first (p1).
One set of nucleotide sample counts, for example for a given sample that represents a pool of sequenc...
size_type a_count
Count of all A nucleotides that are present in the sample.
void set_base_count(SampleCounts &sample, char base, SampleCounts::size_type value)
Set the count for a base given as a char.
constexpr size_t nucleotide_sum(SampleCounts const &sample)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
size_t allele_count(SampleCounts const &sample)
Return the number of alleles, that is, of non-zero nucleotide counts of the sample.
char guess_alternative_base(Variant const &variant, bool force, SampleCountsFilterPolicy filter_policy)
Guess the alternative base of a Variant.
SampleCounts::size_type get_base_count(SampleCounts const &sample, char base)
Get the count for a base given as a char.
bool is_covered(GenomeRegion const ®ion, std::string const &chromosome, size_t position)
Test whether the chromosome/position is within a given genomic region.
SampleCountsFilterPolicy
Policy helper to decide how to treat filtered SampleCounts.
size_type t_count
Count of all T nucleotides that are present in the sample.
constexpr bool is_valid_base_or_n(char c)
Return whether a given base is in ACGTN, case insensitive.
size_type n_count
Count of all N (undetermined/any) nucleotides that are present in the sample.
SampleCounts merge_sample_counts(Variant const &v, SampleCountsFilterPolicy filter_policy)
Merge the counts of a vector SampleCountss.
size_type c_count
Count of all C nucleotides that are present in the sample.
constexpr bool is_valid_base(char c)
Return whether a given base is in ACGT, case insensitive.
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
std::array< size_t, 4 > nucleotide_sorting_order(std::array< T, 4 > const &values)
Return the sorting order of four values, for instance of the four nucleotides ACGT,...
void guess_and_set_ref_and_alt_bases(Variant &variant, bool force, SampleCountsFilterPolicy filter_policy)
Guess the reference and alternative bases for a Variant, and set them.
std::vector< SampleCounts > samples
std::pair< SortedSampleCounts, SortedSampleCounts > sorted_average_sample_counts(SampleCounts const &sample_a, SampleCounts const &sample_b)
Return the sorted base counts of both input samples, orderd by the average frequencies of the nucleot...
std::pair< char, double > consensus(SampleCounts const &sample)
Consensus character for a SampleCounts, and its confidence.
size_t total_nucleotide_sum(Variant const &variant, SampleCountsFilterPolicy filter_policy)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
char guess_reference_base(Variant const &variant, bool force, SampleCountsFilterPolicy filter_policy)
Guess the reference base of a Variant.
size_type d_count
Count of all deleted (*) nucleotides that are present in the sample.
Lookup of Sequences of a reference genome.
SortedSampleCounts sorted_sample_counts(SampleCounts const &sample)
Return the order of base counts (nucleotides), largest one first.
size_t size_type
Public alias for the size type that the class uses to store its counts.
constexpr size_t sample_counts_sum(SampleCounts const &sample)
Sum up all the base counts at this sample, that is, the sum of all A, C, G, T, as well as the N and D...
std::array< size_t, 6 > sample_counts_sorting_order(std::array< T, 6 > const &v)
Return the sorting order of six values, for instance of the four nucleotides ACGT and the N and D cou...
size_type g_count
Count of all G nucleotides that are present in the sample.