A library for working with phylogenetic and population genetic data.
v0.32.0
sample_counts_filter_numerical.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FILTER_SAMPLE_COUNTS_FILTER_NUMERICAL_H_
2 #define GENESIS_POPULATION_FILTER_SAMPLE_COUNTS_FILTER_NUMERICAL_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@sund.ku.dk>
23  University of Copenhagen, Globe Institute, Section for GeoGenetics
24  Oster Voldgade 5-7, 1350 Copenhagen K, Denmark
25 */
26 
37 
38 #include <functional>
39 #include <iosfwd>
40 #include <memory>
41 #include <stdexcept>
42 #include <string>
43 #include <utility>
44 #include <vector>
45 
46 namespace genesis {
47 namespace population {
48 
49 // =================================================================================================
50 // Transform by Count
51 // =================================================================================================
52 
65  SampleCounts& sample,
66  size_t min_count,
67  bool also_n_and_d_counts = true
68 );
69 
76  Variant& variant,
77  size_t min_count,
78  bool also_n_and_d_counts = true
79 );
80 
93  SampleCounts& sample,
94  size_t max_count,
95  bool also_n_and_d_counts = true
96 );
97 
104  Variant& variant,
105  size_t max_count,
106  bool also_n_and_d_counts = true
107 );
108 
109 // =================================================================================================
110 // Sample Counts Filter Numerical Params
111 // =================================================================================================
112 
124 {
125  // -------------------------------------------
126  // Numeric
127  // -------------------------------------------
128 
134  size_t min_count = 0;
135 
141  size_t max_count = 0;
142 
153 
163  size_t min_read_depth = 0;
164 
174  size_t max_read_depth = 0;
175 
176  // -------------------------------------------
177  // SNP vs Invariant
178  // -------------------------------------------
179 
189  bool only_snps = false;
190 
200  bool only_biallelic_snps = false;
201 };
202 
203 // =================================================================================================
204 // Sample Counts Filter Numerical Functions
205 // =================================================================================================
206 
207 // --------------------------------------------------------------------------------------
208 // apply_sample_counts_filter_numerical
209 // --------------------------------------------------------------------------------------
210 
220  SampleCounts& sample,
221  SampleCountsFilterNumericalParams const& params,
223 );
224 
231  SampleCounts& sample,
233 );
234 
248  Variant& variant,
249  SampleCountsFilterNumericalParams const& params,
250  VariantFilterStats& variant_stats,
251  SampleCountsFilterStats& sample_count_stats,
252  bool all_need_pass = false
253 );
254 
261  Variant& variant,
262  SampleCountsFilterNumericalParams const& params,
263  bool all_need_pass = false
264 );
265 
266 // --------------------------------------------------------------------------------------
267 // make_sample_counts_filter_numerical
268 // --------------------------------------------------------------------------------------
269 
281  SampleCountsFilterNumericalParams const& params,
282  bool all_need_pass = false
283 ) {
284  return [params, all_need_pass]( Variant& variant ){
285  apply_sample_counts_filter_numerical( variant, params, all_need_pass );
286  };
287 }
288 
295  SampleCountsFilterNumericalParams const& params,
296  VariantFilterStats& variant_stats,
297  SampleCountsFilterStats& sample_count_stats,
298  bool all_need_pass = false
299 ) {
300  return [params, &variant_stats, &sample_count_stats, all_need_pass]( Variant& variant ){
302  variant, params, variant_stats, sample_count_stats, all_need_pass
303  );
304  };
305 }
306 
307 } // namespace population
308 } // namespace genesis
309 
310 #endif // include guard
genesis::population::transform_zero_out_by_max_count
void transform_zero_out_by_max_count(SampleCounts &sample, size_t max_count, bool also_n_and_d_counts)
Transform a SampleCounts sample by setting any nucleotide count (A, C, G, T) to zero if max_count is ...
Definition: sample_counts_filter_numerical.cpp:78
genesis::population::SampleCountsFilterNumericalParams
Filter settings to filter and transform SampleCounts.
Definition: sample_counts_filter_numerical.hpp:123
genesis::population::SampleCounts
One set of nucleotide sample counts, for example for a given sample that represents a pool of sequenc...
Definition: sample_counts.hpp:56
sample_counts_filter.hpp
sample_counts.hpp
genesis::population::SampleCountsFilterNumericalParams::min_count
size_t min_count
Minimum count for each nucleotide to be considered. All counts below are set to zero.
Definition: sample_counts_filter_numerical.hpp:134
genesis::population::apply_sample_counts_filter_numerical
bool apply_sample_counts_filter_numerical(SampleCounts &sample, SampleCountsFilterNumericalParams const &params, SampleCountsFilterStats &stats)
Filter a given SampleCounts based on the numerical properties of the counts.
Definition: sample_counts_filter_numerical.cpp:115
genesis::population::SampleCountsFilterNumericalParams::max_count
size_t max_count
Maximum count for each nucleotide to be considered. All counts above are set to zero.
Definition: sample_counts_filter_numerical.hpp:141
genesis::population::Variant
A single variant at a position in a chromosome, along with SampleCounts for a set of samples.
Definition: variant.hpp:65
genesis::population::transform_zero_out_by_min_count
void transform_zero_out_by_min_count(SampleCounts &sample, size_t min_count, bool also_n_and_d_counts)
Transform a SampleCounts sample by setting any nucleotide count (A, C, G, T) to zero if min_count is ...
Definition: sample_counts_filter_numerical.cpp:50
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
variant_filter.hpp
genesis::population::SampleCountsFilterNumericalParams::deletions_count_limit
size_t deletions_count_limit
Maximum number of deletions at a position before being filtered out.
Definition: sample_counts_filter_numerical.hpp:152
genesis::population::make_sample_counts_filter_numerical_tagging
std::function< void(Variant &)> make_sample_counts_filter_numerical_tagging(SampleCountsFilterNumericalParams const &params, bool all_need_pass=false)
Return a functional to numerically filter the SampleCounts samples in a Variant tagging the ones that...
Definition: sample_counts_filter_numerical.hpp:280
genesis::population::FilterStats
Counts of how many entries with a particular Filter Tag occured in some data.
Definition: filter_stats.hpp:61
genesis::population::SampleCountsFilterNumericalParams::only_biallelic_snps
bool only_biallelic_snps
Filter if the sample does not have exactly two alleles.
Definition: sample_counts_filter_numerical.hpp:200
genesis::population::SampleCountsFilterNumericalParams::only_snps
bool only_snps
Filter if the sample does not have two or more alleles.
Definition: sample_counts_filter_numerical.hpp:189
genesis::population::SampleCountsFilterNumericalParams::max_read_depth
size_t max_read_depth
Maximum read depth expected for a SampleCounts to be considered covered.
Definition: sample_counts_filter_numerical.hpp:174
genesis::population::SampleCountsFilterNumericalParams::min_read_depth
size_t min_read_depth
Minimum read depth expected for a SampleCounts to be considered covered.
Definition: sample_counts_filter_numerical.hpp:163