|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
40 namespace population {
48 unsigned char min_phred_score
53 size_t total_count = 0;
54 size_t skip_count = 0;
56 for(
size_t i = 0; i < sample.
read_bases.size(); ++i ) {
104 throw std::runtime_error(
105 "Malformed pileup sample: Invalid allele character " +
119 assert( skip_count + total_count == sample.
read_bases.size() );
128 auto const count_sum =
133 !( count_sum == 0 && result.
d_count + skip_count == 1 )
136 throw std::runtime_error(
138 ") does not match the number of bases found in the sample (" +
148 unsigned char min_phred_score
158 for(
size_t i = 0; i < record.
samples.size(); ++i ) {
170 if( sorted[1].count > 0 ) {
179 std::shared_ptr< utils::BaseInputSource > source,
192 if( max_lines > 0 && line_cnt >= max_lines ) {
Variant convert_to_variant(SimplePileupReader::Record const &record, unsigned char min_phred_score)
One set of nucleotide sample counts, for example for a given sample that represents a pool of sequenc...
Reader for line-by-line assessment of (m)pileup files.
size_type a_count
Count of all A nucleotides that are present in the sample.
size_t read_depth
Total count of reads covering this position.
std::string to_string(GenomeLocus const &locus)
constexpr char to_upper(char c) noexcept
Return the upper case version of a letter, ASCII-only.
size_type t_count
Count of all T nucleotides that are present in the sample.
One sample in a pileup line/record.
size_type n_count
Count of all N (undetermined/any) nucleotides that are present in the sample.
SampleCounts convert_to_sample_counts(SimplePileupReader::Sample const &sample, unsigned char min_phred_score)
Single line/record from a pileup file.
QualityEncoding
List of quality encodings for which we support decoding.
size_type c_count
Count of all C nucleotides that are present in the sample.
constexpr bool is_valid_base(char c)
Return whether a given base is in ACGT, case insensitive.
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
QualityEncoding guess_quality_encoding(std::array< size_t, 128 > const &char_counts)
Guess the quality score encoding, based on counts of how often each char appeared in the quality stri...
std::string read_bases
All bases (expect for indels) of the reads that cover the given position.
std::string char_to_hex(char c, bool full)
Return the name and hex representation of a char.
std::vector< SampleCounts > samples
std::vector< Sample > samples
genesis::sequence::QualityEncoding guess_pileup_quality_encoding(std::shared_ptr< utils::BaseInputSource > source, size_t max_lines)
Guess the quality score encoding for an (m)pileup input, based on counts of how often each char appea...
size_type d_count
Count of all deleted (*) nucleotides that are present in the sample.
SortedSampleCounts sorted_sample_counts(SampleCounts const &sample)
Return the order of base counts (nucleotides), largest one first.
std::vector< unsigned char > phred_scores
Phread-scaled scores of the bases as given in read_bases.
bool with_quality_string() const
size_type g_count
Count of all G nucleotides that are present in the sample.