A library for working with phylogenetic and population genetic data.
v0.32.0
sample_counts_filter.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2024 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
35 
36 #include <cassert>
37 #include <cmath>
38 #include <iostream>
39 #include <sstream>
40 #include <stdexcept>
41 #include <string>
42 
43 namespace genesis {
44 namespace population {
45 
46 // We want to make sure that the tags enum is exactly as expected here. In case that we later
47 // add other values to that enum, we want to know here, in order to adapt all below functions
48 // accordingly.
49 static_assert(
51  "SampleCountsFilterTag::kEnd != 12. The enum has values that are not accounted for."
52 );
53 static_assert(
55  "SampleCountsFilterTagCategory::kEnd != 4. The enum has values that are not accounted for."
56 );
57 
58 // =================================================================================================
59 // Stats
60 // =================================================================================================
61 
63 {
64  // Just return the category of the tag.
65  switch( tag ) {
82  default: {
83  throw std::invalid_argument(
84  "Invalid SampleCountsFilterTag: " +
85  std::to_string( static_cast<FilterStatus::IntType>( tag ))
86  );
87  }
88  }
89  assert( false );
90 
91  // Make compilers happy, just in case.
93 }
94 
96  SampleCountsFilterStats const& stats
97 ) {
98  // assert( stats[ SampleCountsFilterTag::kEnd ] == 0 );
99  assert( stats.data.size() == static_cast<size_t>( SampleCountsFilterTag::kEnd ) );
100 
101  // Build our result, by simply adding up the values to our simple categories / classes.
115  return result;
116 }
117 
120 ) {
121  // assert( stats[ SampleCountsFilterTag::kEnd ] == 0 );
122  assert( stats.data.size() == static_cast<size_t>( SampleCountsFilterTag::kEnd ) );
123 
124  // Select the requested category and add up their values.
125  size_t result = 0;
126  switch( category ) {
128  result += stats[ SampleCountsFilterTag::kPassed ];
129  break;
130  }
132  result += stats[ SampleCountsFilterTag::kMaskedPosition ];
133  result += stats[ SampleCountsFilterTag::kMaskedRegion ];
134  break;
135  }
137  result += stats[ SampleCountsFilterTag::kMissing ];
138  result += stats[ SampleCountsFilterTag::kNotPassed ];
139  result += stats[ SampleCountsFilterTag::kInvalid ];
140  break;
141  }
143  result += stats[ SampleCountsFilterTag::kEmpty ];
147  result += stats[ SampleCountsFilterTag::kNotSnp ];
148  result += stats[ SampleCountsFilterTag::kNotBiallelicSnp ];
149  break;
150  }
151  default: {
152  throw std::invalid_argument(
153  "Invalid SampleCountsFilterTagCategory: " +
154  std::to_string( static_cast<FilterStatus::IntType>( category ))
155  );
156  }
157  }
158  return result;
159 }
160 
161 // =================================================================================================
162 // Printing
163 // =================================================================================================
164 
165 // --------------------------------------------------------------------------------------
166 // Print sample stats
167 // --------------------------------------------------------------------------------------
168 
170  std::ostream& os,
171  SampleCountsFilterStats const& stats,
172  bool verbose
173 ) {
174  // assert( stats[ SampleCountsFilterTag::kEnd ] == 0 );
175  assert( stats.data.size() == static_cast<size_t>( SampleCountsFilterTag::kEnd ) );
176 
177  // Go through all possible enum values and print them
178  if( stats[SampleCountsFilterTag::kMaskedPosition] > 0 || verbose ) {
179  os << "Masked position: " << stats[SampleCountsFilterTag::kMaskedPosition] << "\n";
180  }
181  if( stats[SampleCountsFilterTag::kMaskedRegion] > 0 || verbose ) {
182  os << "Masked region: " << stats[SampleCountsFilterTag::kMaskedRegion] << "\n";
183  }
184  if( stats[SampleCountsFilterTag::kMissing] > 0 || verbose ) {
185  os << "Missing: " << stats[SampleCountsFilterTag::kMissing] << "\n";
186  }
187  if( stats[SampleCountsFilterTag::kNotPassed] > 0 || verbose ) {
188  os << "Not passed: " << stats[SampleCountsFilterTag::kNotPassed] << "\n";
189  }
190  if( stats[SampleCountsFilterTag::kInvalid] > 0 || verbose ) {
191  os << "Invalid: " << stats[SampleCountsFilterTag::kInvalid] << "\n";
192  }
193  if( stats[SampleCountsFilterTag::kEmpty] > 0 || verbose ) {
194  os << "Empty: " << stats[SampleCountsFilterTag::kEmpty] << "\n";
195  }
196  if( stats[SampleCountsFilterTag::kBelowMinReadDepth] > 0 || verbose ) {
197  os << "Below min read depth: " << stats[SampleCountsFilterTag::kBelowMinReadDepth] << "\n";
198  }
199  if( stats[SampleCountsFilterTag::kAboveMaxReadDepth] > 0 || verbose ) {
200  os << "Above max read depth: " << stats[SampleCountsFilterTag::kAboveMaxReadDepth] << "\n";
201  }
202  if( stats[SampleCountsFilterTag::kAboveDeletionsCountLimit] > 0 || verbose ) {
203  os << "Above deletions limit: " << stats[SampleCountsFilterTag::kAboveDeletionsCountLimit] << "\n";
204  }
205  if( stats[SampleCountsFilterTag::kNotSnp] > 0 || verbose ) {
206  os << "Not SNP: " << stats[SampleCountsFilterTag::kNotSnp] << "\n";
207  }
208  if( stats[SampleCountsFilterTag::kNotBiallelicSnp] > 0 || verbose ) {
209  os << "Not biallelic SNP: " << stats[SampleCountsFilterTag::kNotBiallelicSnp] << "\n";
210  }
211  if( stats[SampleCountsFilterTag::kPassed] > 0 || verbose ) {
212  os << "Passed: " << stats[SampleCountsFilterTag::kPassed] << "\n";
213  }
214  return os;
215 }
216 
218  SampleCountsFilterStats const& stats,
219  bool verbose
220 ) {
221  std::stringstream ss;
222  print_sample_counts_filter_stats( ss, stats, verbose );
223  return ss.str();
224 }
225 
226 // --------------------------------------------------------------------------------------
227 // Print category stats
228 // --------------------------------------------------------------------------------------
229 
231  std::ostream& os,
232  SampleCountsFilterCategoryStats const& stats,
233  bool verbose
234 ) {
235  // assert( stats[ SampleCountsFilterTagCategory::kEnd ] == 0 );
236  assert( stats.data.size() == static_cast<size_t>( SampleCountsFilterTagCategory::kEnd ) );
237 
238  // Go through all possible enum values and print them
239  if( stats[SampleCountsFilterTagCategory::kMasked] > 0 || verbose ) {
240  os << "Masked: " << stats[SampleCountsFilterTagCategory::kMasked] << "\n";
241  }
242  if( stats[SampleCountsFilterTagCategory::kMissingInvalid] > 0 || verbose ) {
243  os << "Missing: " << stats[SampleCountsFilterTagCategory::kMissingInvalid] << "\n";
244  }
245  if( stats[SampleCountsFilterTagCategory::kNumeric] > 0 || verbose ) {
246  os << "Numeric: " << stats[SampleCountsFilterTagCategory::kNumeric] << "\n";
247  }
248  if( stats[SampleCountsFilterTagCategory::kPassed] > 0 || verbose ) {
249  os << "Passed: " << stats[SampleCountsFilterTagCategory::kPassed] << "\n";
250  }
251  return os;
252 }
253 
255  SampleCountsFilterCategoryStats const& stats,
256  bool verbose
257 ) {
258  std::stringstream ss;
259  print_sample_counts_filter_category_stats( ss, stats, verbose );
260  return ss.str();
261 }
262 
263 } // namespace population
264 } // namespace genesis
genesis::population::SampleCountsFilterTag::kAboveDeletionsCountLimit
@ kAboveDeletionsCountLimit
Too many deletions at the position.
functions.hpp
genesis::population::SampleCountsFilterTag::kMaskedRegion
@ kMaskedRegion
Position is part of a masked region.
genesis::population::FilterStats::data
FilterTagArray data
Definition: filter_stats.hpp:184
genesis::population::SampleCountsFilterTagCategory::kPassed
@ kPassed
SampleCounts has passed all filters.
genesis::population::to_string
std::string to_string(GenomeLocus const &locus)
Definition: function/genome_locus.hpp:52
sample_counts_filter.hpp
genesis::population::SampleCountsFilterTagCategory::kNumeric
@ kNumeric
Any of the numeric variant filters failed.
genesis::population::print_sample_counts_filter_stats
std::ostream & print_sample_counts_filter_stats(std::ostream &os, SampleCountsFilterStats const &stats, bool verbose)
Print a textual representation of the counts collected.
Definition: sample_counts_filter.cpp:169
genesis::population::SampleCountsFilterTag::kAboveMaxReadDepth
@ kAboveMaxReadDepth
Sum of counts across all nucleotide counts is above the max read depth threshold.
genesis::population::FilterStatus::IntType
uint32_t IntType
Definition: filter_status.hpp:69
genesis::population::SampleCountsFilterTagCategory
SampleCountsFilterTagCategory
List of filter categories for a SampleCounts.
Definition: sample_counts_filter.hpp:171
genesis::population::sample_counts_filter_stats_category_counts
SampleCountsFilterCategoryStats sample_counts_filter_stats_category_counts(SampleCountsFilterStats const &stats)
Generate summary counts for a SampleCountsFilterStats counter.
Definition: sample_counts_filter.cpp:95
genesis::population::SampleCountsFilterTag::kBelowMinReadDepth
@ kBelowMinReadDepth
Sum of counts across all nucleotide counts is below the min read depth threshold.
genesis::population::SampleCountsFilterTag::kPassed
@ kPassed
Sample has passed all filters.
genesis::population::print_sample_counts_filter_category_stats
std::ostream & print_sample_counts_filter_category_stats(std::ostream &os, SampleCountsFilterCategoryStats const &stats, bool verbose)
Definition: sample_counts_filter.cpp:230
genesis::population::SampleCountsFilterTag::kEmpty
@ kEmpty
Zero nucleotide counts, after zeroing out counts based on the min_count and max_count.
genesis::population::SampleCountsFilterTag::kInvalid
@ kInvalid
Generic indicator that the sample is invalid.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::SampleCountsFilterTag::kNotPassed
@ kNotPassed
Generic indicator that the sample has not passed a filter.
genesis::population::SampleCountsFilterTag::kEnd
@ kEnd
char.hpp
genesis::population::SampleCountsFilterTag::kNotSnp
@ kNotSnp
Invariant position, not a SNP.
genesis::population::SampleCountsFilterTagCategory::kMasked
@ kMasked
Position is masked.
genesis::population::FilterStats
Counts of how many entries with a particular Filter Tag occured in some data.
Definition: filter_stats.hpp:61
genesis::population::SampleCountsFilterTagCategory::kEnd
@ kEnd
End of the enum values.
genesis::population::SampleCountsFilterTagCategory::kMissingInvalid
@ kMissingInvalid
Position is missing or otherwise invalid.
genesis::population::SampleCountsFilterTag
SampleCountsFilterTag
Definition: sample_counts_filter.hpp:54
genesis::population::SampleCountsFilterTag::kMaskedPosition
@ kMaskedPosition
Position has been masked out from processing.
genesis::population::SampleCountsFilterTag::kMissing
@ kMissing
Position is missing in the input data.
genesis::population::sample_counts_filter_tag_to_category
SampleCountsFilterTagCategory sample_counts_filter_tag_to_category(SampleCountsFilterTag tag)
For a given tag, return its category tag.
Definition: sample_counts_filter.cpp:62
genesis::population::SampleCountsFilterTag::kNotBiallelicSnp
@ kNotBiallelicSnp
SNP position, but not biallelic, i.e., has more than one alternative.