A library for working with phylogenetic and population genetic data.
v0.32.0
variant_filter.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@sund.ku.dk>
20  University of Copenhagen, Globe Institute, Section for GeoGenetics
21  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
22 */
23 
32 
35 
36 #include <array>
37 #include <cassert>
38 #include <cmath>
39 #include <iostream>
40 #include <stdexcept>
41 #include <string>
42 #include <sstream>
43 
44 namespace genesis {
45 namespace population {
46 
47 // We want to make sure that the tags enum is exactly as expected here. In case that we later
48 // add other values to that enum, we want to know here, in order to adapt all below functions
49 // accordingly.
50 static_assert(
51  static_cast<FilterStatus::IntType>( VariantFilterTag::kEnd ) == 17,
52  "VariantFilterTag::kEnd != 17. The enum has values that are not accounted for."
53 );
54 static_assert(
56  "VariantFilterTagCategory::kEnd != 6. The enum has values that are not accounted for."
57 );
58 
59 // =================================================================================================
60 // Stats
61 // =================================================================================================
62 
64 {
65  // Just return the category of the tag.
66  switch( tag ) {
90  default: {
91  throw std::invalid_argument(
92  "Invalid VariantFilterTag: " +
93  std::to_string( static_cast<FilterStatus::IntType>( tag ))
94  );
95  }
96  }
97  assert( false );
98 
99  // Make compilers happy, just in case.
101 }
102 
104  VariantFilterStats const& stats
105 ) {
106  // assert( stats[ VariantFilterTag::kEnd ] == 0 );
107  assert( stats.data.size() == static_cast<size_t>( VariantFilterTag::kEnd ) );
108 
109  // Build our result, by simply adding up the values to our simple categories / classes.
128  return result;
129 }
130 
132  VariantFilterStats const& stats, VariantFilterTagCategory category
133 ) {
134  // assert( stats[ VariantFilterTag::kEnd ] == 0 );
135  assert( stats.data.size() == static_cast<size_t>( VariantFilterTag::kEnd ) );
136 
137  // Select the requested category and add up their values.
138  size_t result = 0;
139  switch( category ) {
141  result += stats[ VariantFilterTag::kPassed ];
142  break;
143  }
145  result += stats[ VariantFilterTag::kMaskedPosition ];
146  result += stats[ VariantFilterTag::kMaskedRegion ];
147  break;
148  }
150  result += stats[ VariantFilterTag::kMissing ];
151  result += stats[ VariantFilterTag::kNotPassed ];
152  result += stats[ VariantFilterTag::kInvalid ];
153  break;
154  }
156  result += stats[ VariantFilterTag::kNoSamplePassed ];
157  result += stats[ VariantFilterTag::kNotAllSamplesPassed ];
158  break;
159  }
161  result += stats[ VariantFilterTag::kEmpty ];
162  result += stats[ VariantFilterTag::kBelowMinReadDepth ];
163  result += stats[ VariantFilterTag::kAboveMaxReadDepth ];
165  break;
166  }
168  result += stats[ VariantFilterTag::kNotSnp ];
169  result += stats[ VariantFilterTag::kNotBiallelicSnp ];
170  result += stats[ VariantFilterTag::kBelowSnpMinCount ];
171  result += stats[ VariantFilterTag::kAboveSnpMaxCount ];
172  result += stats[ VariantFilterTag::kBelowMinAlleleFreq ];
173  break;
174  }
175  default: {
176  throw std::invalid_argument(
177  "Invalid VariantFilterTagCategory: " +
178  std::to_string( static_cast<FilterStatus::IntType>( category ))
179  );
180  }
181  }
182  return result;
183 }
184 
185 // =================================================================================================
186 // Printing
187 // =================================================================================================
188 
189 // --------------------------------------------------------------------------------------
190 // Print variant stats
191 // --------------------------------------------------------------------------------------
192 
194  std::ostream& os,
195  VariantFilterStats const& stats,
196  bool verbose
197 ) {
198  // assert( stats[ VariantFilterTag::kEnd ] == 0 );
199  assert( stats.data.size() == static_cast<size_t>( VariantFilterTag::kEnd ) );
200 
201  // Go through all possible enum values and print them
202  if( stats[VariantFilterTag::kMaskedPosition] > 0 || verbose ) {
203  os << "Masked position: " << stats[VariantFilterTag::kMaskedPosition] << "\n";
204  }
205  if( stats[VariantFilterTag::kMaskedRegion] > 0 || verbose ) {
206  os << "Masked region: " << stats[VariantFilterTag::kMaskedRegion] << "\n";
207  }
208  if( stats[VariantFilterTag::kMissing] > 0 || verbose ) {
209  os << "Missing: " << stats[VariantFilterTag::kMissing] << "\n";
210  }
211  if( stats[VariantFilterTag::kNotPassed] > 0 || verbose ) {
212  os << "Not passed: " << stats[VariantFilterTag::kNotPassed] << "\n";
213  }
214  if( stats[VariantFilterTag::kInvalid] > 0 || verbose ) {
215  os << "Invalid: " << stats[VariantFilterTag::kInvalid] << "\n";
216  }
217  if( stats[VariantFilterTag::kNoSamplePassed] > 0 || verbose ) {
218  os << "No sample passed: " << stats[VariantFilterTag::kNoSamplePassed] << "\n";
219  }
220  if( stats[VariantFilterTag::kNotAllSamplesPassed] > 0 || verbose ) {
221  os << "Not all samples passed: " << stats[VariantFilterTag::kNotAllSamplesPassed] << "\n";
222  }
223  if( stats[VariantFilterTag::kEmpty] > 0 || verbose ) {
224  os << "Empty: " << stats[VariantFilterTag::kEmpty] << "\n";
225  }
226  if( stats[VariantFilterTag::kBelowMinReadDepth] > 0 || verbose ) {
227  os << "Below min read depth: " << stats[VariantFilterTag::kBelowMinReadDepth] << "\n";
228  }
229  if( stats[VariantFilterTag::kAboveMaxReadDepth] > 0 || verbose ) {
230  os << "Above max read depth: " << stats[VariantFilterTag::kAboveMaxReadDepth] << "\n";
231  }
232  if( stats[VariantFilterTag::kAboveDeletionsCountLimit] > 0 || verbose ) {
233  os << "Above deletions limit: " << stats[VariantFilterTag::kAboveDeletionsCountLimit] << "\n";
234  }
235  if( stats[VariantFilterTag::kNotSnp] > 0 || verbose ) {
236  os << "Not SNP: " << stats[VariantFilterTag::kNotSnp] << "\n";
237  }
238  if( stats[VariantFilterTag::kNotBiallelicSnp] > 0 || verbose ) {
239  os << "Not biallelic SNP: " << stats[VariantFilterTag::kNotBiallelicSnp] << "\n";
240  }
241  if( stats[VariantFilterTag::kBelowSnpMinCount] > 0 || verbose ) {
242  os << "Below SNP min count: " << stats[VariantFilterTag::kBelowSnpMinCount] << "\n";
243  }
244  if( stats[VariantFilterTag::kAboveSnpMaxCount] > 0 || verbose ) {
245  os << "Above SNP max count: " << stats[VariantFilterTag::kAboveSnpMaxCount] << "\n";
246  }
247  if( stats[VariantFilterTag::kBelowMinAlleleFreq] > 0 || verbose ) {
248  os << "Below min alele freq: " << stats[VariantFilterTag::kBelowMinAlleleFreq] << "\n";
249  }
250  if( stats[VariantFilterTag::kPassed] > 0 || verbose ) {
251  os << "Passed: " << stats[VariantFilterTag::kPassed] << "\n";
252  }
253  return os;
254 }
255 
257  VariantFilterStats const& stats,
258  bool verbose
259 ) {
260  std::stringstream ss;
261  print_variant_filter_stats( ss, stats, verbose );
262  return ss.str();
263 }
264 
265 // --------------------------------------------------------------------------------------
266 // Print category stats
267 // --------------------------------------------------------------------------------------
268 
270  std::ostream& os,
271  VariantFilterCategoryStats const& stats,
272  bool verbose
273 ) {
274  // assert( stats[ VariantFilterTagCategory::kEnd ] == 0 );
275  assert( stats.data.size() == static_cast<size_t>( VariantFilterTagCategory::kEnd ) );
276 
277  // Go through all possible enum values and print them
278  if( stats[VariantFilterTagCategory::kMasked] > 0 || verbose ) {
279  os << "Masked: " << stats[VariantFilterTagCategory::kMasked] << "\n";
280  }
281  if( stats[VariantFilterTagCategory::kMissingInvalid] > 0 || verbose ) {
282  os << "Missing: " << stats[VariantFilterTagCategory::kMissingInvalid] << "\n";
283  }
284  if( stats[VariantFilterTagCategory::kSamplesFailed] > 0 || verbose ) {
285  os << "Empty: " << stats[VariantFilterTagCategory::kSamplesFailed] << "\n";
286  }
287  if( stats[VariantFilterTagCategory::kNumeric] > 0 || verbose ) {
288  os << "Numeric: " << stats[VariantFilterTagCategory::kNumeric] << "\n";
289  }
290  if( stats[VariantFilterTagCategory::kInvariant] > 0 || verbose ) {
291  os << "Invariant: " << stats[VariantFilterTagCategory::kInvariant] << "\n";
292  }
293  if( stats[VariantFilterTagCategory::kPassed] > 0 || verbose ) {
294  os << "Passed: " << stats[VariantFilterTagCategory::kPassed] << "\n";
295  }
296  return os;
297 }
298 
300  VariantFilterCategoryStats const& stats,
301  bool verbose
302 ) {
303  std::stringstream ss;
304  print_variant_filter_category_stats( ss, stats, verbose );
305  return ss.str();
306 }
307 
308 } // namespace population
309 } // namespace genesis
genesis::population::VariantFilterTag::kAboveSnpMaxCount
@ kAboveSnpMaxCount
Sum of nucleotides is above VariantFilterNumericalParams::snp_max_count.
genesis::population::VariantFilterTag::kMaskedPosition
@ kMaskedPosition
Position has been masked out from processing.
functions.hpp
genesis::population::VariantFilterTagCategory::kNumeric
@ kNumeric
Any of the numeric variant filters failed.
genesis::population::VariantFilterTag::kBelowMinAlleleFreq
@ kBelowMinAlleleFreq
Did not reach minimum allele frequency.
genesis::population::VariantFilterTagCategory::kSamplesFailed
@ kSamplesFailed
Either all or some of the samples failed their respetive filters.
genesis::population::VariantFilterTag::kInvalid
@ kInvalid
Generic indicator that the Variant is invalid.
genesis::population::VariantFilterTag::kNotPassed
@ kNotPassed
Generic indicator that the Variant has not passed a filter.
genesis::population::VariantFilterTagCategory
VariantFilterTagCategory
List of filter categories for a Variant.
Definition: variant_filter.hpp:261
genesis::population::FilterStats::data
FilterTagArray data
Definition: filter_stats.hpp:184
genesis::population::VariantFilterTag::kNotBiallelicSnp
@ kNotBiallelicSnp
SNP position, but not biallelic, i.e., has more than one alternative.
genesis::population::variant_filter_stats_category_counts
VariantFilterCategoryStats variant_filter_stats_category_counts(VariantFilterStats const &stats)
Generate summary counts for a VariantFilterStats counter.
Definition: variant_filter.cpp:103
genesis::population::VariantFilterTagCategory::kInvariant
@ kInvariant
Postion is not a SNP according to the SNP filters.
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
genesis::population::VariantFilterTag::kMaskedRegion
@ kMaskedRegion
Position is part of a masked region.
genesis::population::VariantFilterTag::kEmpty
@ kEmpty
All counts across all samples are zero.
genesis::population::VariantFilterTagCategory::kMissingInvalid
@ kMissingInvalid
Position is missing or otherwise invalid.
genesis::population::VariantFilterTagCategory::kPassed
@ kPassed
Variant has passed all filters.
genesis::population::FilterStatus::IntType
uint32_t IntType
Definition: filter_status.hpp:69
genesis::population::VariantFilterTag::kNotSnp
@ kNotSnp
Invariant position, not a SNP.
genesis::population::VariantFilterTagCategory::kEnd
@ kEnd
End of the enum values.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::VariantFilterTag::kBelowMinReadDepth
@ kBelowMinReadDepth
Sum of counts across all samples is below the min read depth threshold.
variant_filter.hpp
char.hpp
genesis::population::VariantFilterTag::kAboveDeletionsCountLimit
@ kAboveDeletionsCountLimit
Too many deletions at the position.
genesis::population::VariantFilterTag::kNoSamplePassed
@ kNoSamplePassed
None of the SampleCounts of the Variant passed their filters.
genesis::population::VariantFilterTag
VariantFilterTag
List of filters that we apply to a Variant, to indicate whether the Variant passed or not.
Definition: variant_filter.hpp:71
genesis::population::VariantFilterTag::kMissing
@ kMissing
Position is missing in the input data.
genesis::population::VariantFilterTag::kEnd
@ kEnd
End of the enum values.
genesis::population::VariantFilterTagCategory::kMasked
@ kMasked
Position is masked.
genesis::population::print_variant_filter_category_stats
std::ostream & print_variant_filter_category_stats(std::ostream &os, VariantFilterCategoryStats const &stats, bool verbose)
Definition: variant_filter.cpp:269
genesis::population::VariantFilterTag::kNotAllSamplesPassed
@ kNotAllSamplesPassed
Some of the SampleCounts of the Variant did not pass their filters.
genesis::population::print_variant_filter_stats
std::ostream & print_variant_filter_stats(std::ostream &os, VariantFilterStats const &stats, bool verbose)
Print a textual representation of the counts collected.
Definition: variant_filter.cpp:193
genesis::population::FilterStats
Counts of how many entries with a particular Filter Tag occured in some data.
Definition: filter_stats.hpp:61
genesis::population::variant_filter_tag_to_category
VariantFilterTagCategory variant_filter_tag_to_category(VariantFilterTag tag)
For a given tag, return its category tag.
Definition: variant_filter.cpp:63
genesis::population::VariantFilterTag::kBelowSnpMinCount
@ kBelowSnpMinCount
Sum of nucleotides is below VariantFilterNumericalParams::snp_min_count.
genesis::population::VariantFilterTag::kAboveMaxReadDepth
@ kAboveMaxReadDepth
Sum of counts across all samples is above the max read depth threshold.
genesis::population::VariantFilterTag::kPassed
@ kPassed
Variant has passed all filters.