|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
46 namespace population {
53 : number_of_bins_( number_of_bins )
58 on_chromosome_start_( chromosome, accu );
61 on_chromosome_finish_( chromosome, accu );
66 on_emission_( window );
81 for(
auto record =
VcfInputStream( vcf_file ); record; ++record ) {
87 std::string
const& chromosome,
size_t position,
double frequency
89 if( ! std::isfinite(frequency) || frequency < 0.0 || frequency > 1.0 ) {
90 throw std::invalid_argument(
95 window_generator_.
enqueue( chromosome, position, frequency );
102 if( skip_invalid_records_ ) {
105 throw std::runtime_error(
106 "Invalid VCF Record for Allele Frequency Window that is either not a biallelic SNP "
107 "or does not have the FORMAT field `AD`."
116 if( ad_field.values_per_sample() != 2 ) {
117 throw std::runtime_error(
118 "Invalid VCF Record that claims to be biallelic, but in fact contains more than "
119 "two values for the FORMAT field `AD` for a sample."
123 ref += ad_field.get_value_at(0);
124 alt += ad_field.get_value_at(1);
130 double freq =
static_cast<double>(alt) /
static_cast<double>( alt + ref );
131 if( ! std::isfinite(freq) ) {
154 void AlleleFrequencyWindow::on_chromosome_start_(
155 std::string
const& chromosome,
156 AFWindow::Accumulator&
158 spectra_.emplace_back( chromosome );
159 assert( ! spectra_.empty() );
160 assert( spectra_.back().chromosome == chromosome );
168 void AlleleFrequencyWindow::on_chromosome_finish_(
169 std::string
const& chromosome,
170 AFWindow::Accumulator&
172 assert( ! spectra_.empty() );
173 assert( spectra_.back().chromosome == chromosome );
182 void AlleleFrequencyWindow::on_emission_(
183 AFWindow
const& window
185 assert( ! spectra_.empty() );
186 auto& values = spectra_.back().values;
187 values.emplace_back( number_of_bins_, 0 );
188 auto& bins = values.back();
191 for(
auto const& entry : window ) {
192 if( ! std::isfinite(entry.data) || entry.data < 0.0 || entry.data > 1.0 ) {
193 throw std::invalid_argument(
194 "Invalid allele frequency " +
std::to_string(entry.data) +
" at " +
195 spectra_.back().chromosome +
":" +
std::to_string( entry.position )
201 size_t index = std::floor( entry.data *
static_cast<double>( number_of_bins_ ));
202 index = std::min( index, number_of_bins_ - 1 );
210 #endif // htslib guard
bool is_snp() const
Return whether this variant is a SNP.
AlleleFrequencyWindow(size_t width, size_t number_of_bins=100)
self_type & add_emission_plugin(on_emission const &plugin)
Add an on_emission plugin function, typically a lambda.
Window over the chromosomes of a genome.
std::function< void(Spectrum const &)> on_chromosome_start
void finish_chromosome(size_t last_position=0)
Explicitly finish a chromosome, and emit all remaining Windows.
std::string to_string(GenomeLocus const &locus)
SlidingWindowType
SlidingWindowType of a Window, that is, whether we slide along a fixed size interval of the genome,...
std::string get_chromosome() const
Get the name of a chromosome/contig/sequence (CHROM, first column of the line).
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
void enqueue(std::string const &chromosome, size_t position, double frequency)
bool has_format(std::string const &id) const
Return whether the record has a given FORMAT id present.
self_type & add_chromosome_start_plugin(on_chromosome_start const &plugin)
Add an on_chromosome_start plugin function, typically a lambda.
void enqueue(std::string const &chromosome, size_t position, Data const &data)
Enqueue a new Data value.
size_t get_position() const
Get the position within the chromosome/contig (POS, second column of the line).
@ kInterval
Windows of this type are defined by a fixed start and end position on a chromosome.
std::function< void(Spectrum const &)> on_chromosome_finish
self_type & add_chromosome_finish_plugin(on_chromosome_finish const &plugin)
Add an on_chromosome_finish plugin function, typically a lambda.
Capture the information of a single SNP/variant line in a VCF/BCF file.
size_t get_alternatives_count() const
Get the number of alternative alleles/sequences of the variant (ALT, fifth column of the line).
genesis::utils::Range< VcfFormatIteratorInt > get_format_int(std::string const &id) const
Get an iterator pair over the samples that accesses a certain FORMAT id as an int value.
void run_vcf(std::string const &vcf_file)