42 namespace population {
54 if( record.
samples.size() != 1 ) {
55 throw std::runtime_error(
56 "Invalid pileup record line that does not contain exactly one sample."
61 auto const& smp = record.
samples[0];
62 if( smp.read_bases.size() != smp.phred_scores.size() ) {
63 throw std::runtime_error(
64 "Invalid pileup record line where number of read bases "
65 "differs from number of phred scores."
72 auto const base_cnt = smp.read_bases.size();
73 result.
alleles.reserve( base_cnt );
77 auto check_ancestral_base_char_ = [](
char r ){
78 if( r !=
'A' && r !=
'C' && r !=
'G' && r !=
'T' && r !=
'N' ) {
79 throw std::runtime_error(
80 "Invalid ancestral base in pileup record line that is not in [ACGTN]."
87 if( smp.ancestral_base !=
'\0' ) {
88 check_ancestral_base_char_( smp.ancestral_base );
89 derived_base = smp.ancestral_base;
99 throw std::runtime_error(
100 "Cannot convert pileup record line for allele frequency estimation, "
101 "as it misses the ancestral allele information."
107 for(
size_t i = 0; i < base_cnt; ++i ) {
109 result.
alleles.emplace_back(
false );
110 result.
phred_scores.emplace_back( smp.phred_scores[i] );
111 }
else if( smp.read_bases[i] == derived_base ) {
112 result.
alleles.emplace_back(
true );
113 result.
phred_scores.emplace_back( smp.phred_scores[i] );
123 std::vector<bool>
const& alleles,
124 std::vector<unsigned char>
const& phred_scores,
134 assert( prob1.size() == n + 1 );
135 assert( prob2.size() == n + 1 );
137 auto result = std::vector<double>( n + 1 );
138 for(
size_t i = 0; i < n + 1; ++i ) {
139 result[i] = ( prob1[i] + prob2[i] ) / 2.0;
147 std::vector<bool>
const& alleles,
148 std::vector<unsigned char>
const& phred_scores,
153 auto result = std::vector<double>( n + 1, 1.0 );
157 auto const r = alleles.size();
158 auto const nd =
static_cast<double>( n );
159 if( alleles.size() != phred_scores.size() ) {
160 throw std::invalid_argument(
161 "Invalid alleles and their error probabilities with different size."
166 for(
size_t i = 0; i < r; ++i ) {
170 assert( 0.0 < err_prob && err_prob <= 1.0 );
175 if( alleles[i] ^ invert_alleles ) {
176 for(
size_t j = 0; j < n + 1; ++j ) {
177 auto const jn =
static_cast<double>( j ) / nd;
178 auto const term1 = ( 1.0 - err_prob ) * jn;
179 auto const term2 = err_prob * ( 1.0 - jn );
181 result[j] *= ( term1 + term2 );
184 for(
size_t j = 0; j < n + 1; ++j ) {
185 auto const jn =
static_cast<double>( j ) / nd;
186 auto const term1 = ( 1.0 - err_prob ) * ( 1.0 - jn );
187 auto const term2 = err_prob * jn;
189 result[j] *= ( term1 + term2 );