A library for working with phylogenetic and population genetic data.
v0.27.0
filter_transform.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2022 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lczech@carnegiescience.edu>
20  Department of Plant Biology, Carnegie Institution For Science
21  260 Panama Street, Stanford, CA 94305, USA
22 */
23 
32 
33 #include <cassert>
34 #include <iostream>
35 
36 namespace genesis {
37 namespace population {
38 
39 // =================================================================================================
40 // Filter by Status
41 // =================================================================================================
42 
44  std::function<bool(BaseCountsStatus const&)> predicate,
45  Variant const& variant,
46  SampleFilterType type,
47  size_t min_coverage,
48  size_t max_coverage,
49  size_t min_count,
50  bool tolerate_deletions
51 ) {
52  // kMerge: Merge first, then stat.
53  if( type == SampleFilterType::kMerge ) {
54  auto const smp = merge( variant.samples );
55  auto const stat = status( smp, min_coverage, max_coverage, min_count, tolerate_deletions );
56  return predicate( stat );
57  }
58 
59  // kConjunction or kDisjunction: Stat individually, then logical combination.
60  size_t true_cnt = 0;
61  for( auto const& smp : variant.samples ) {
62  auto const stat = status( smp, min_coverage, max_coverage, min_count, tolerate_deletions );
63  true_cnt += static_cast<size_t>( predicate( stat ));
64  }
65  if( type == SampleFilterType::kConjunction ) {
66  return true_cnt == variant.samples.size();
67  } else if( type == SampleFilterType::kDisjunction ) {
68  return true_cnt > 0;
69  }
70  throw std::invalid_argument( "Invalid SampleFilterType." );
71 }
72 
73 // =================================================================================================
74 // Transform by Count
75 // =================================================================================================
76 
77 void transform_zero_out_by_min_count( BaseCounts& sample, size_t min_count )
78 {
79  // Reset counts if needed, according to min count setting.
80  if( sample.a_count < min_count ) {
81  sample.a_count = 0;
82  }
83  if( sample.c_count < min_count ) {
84  sample.c_count = 0;
85  }
86  if( sample.g_count < min_count ) {
87  sample.g_count = 0;
88  }
89  if( sample.t_count < min_count ) {
90  sample.t_count = 0;
91  }
92 }
93 
94 void transform_zero_out_by_min_count( Variant& variant, size_t min_count )
95 {
96  for( auto& sample : variant.samples ) {
97  transform_zero_out_by_min_count( sample, min_count );
98  }
99 }
100 
101 void transform_zero_out_by_max_count( BaseCounts& sample, size_t max_count )
102 {
103  if( max_count == 0 ) {
104  return;
105  }
106 
107  // Reset counts if needed, according to max count setting.
108  if( sample.a_count > max_count ) {
109  sample.a_count = 0;
110  }
111  if( sample.c_count > max_count ) {
112  sample.c_count = 0;
113  }
114  if( sample.g_count > max_count ) {
115  sample.g_count = 0;
116  }
117  if( sample.t_count > max_count ) {
118  sample.t_count = 0;
119  }
120 }
121 
122 void transform_zero_out_by_max_count( Variant& variant, size_t max_count )
123 {
124  for( auto& sample : variant.samples ) {
125  transform_zero_out_by_max_count( sample, max_count );
126  }
127 }
128 
129 void transform_zero_out_by_min_max_count( BaseCounts& sample, size_t min_count, size_t max_count )
130 {
131  // We could just call transform_zero_out_by_min_count() and transform_zero_out_by_max_count()
132  // here, but doing that would require more branching, so let's do a little code duplication.
133  if( sample.a_count < min_count || ( max_count > 0 && sample.a_count > max_count )) {
134  sample.a_count = 0;
135  }
136  if( sample.c_count < min_count || ( max_count > 0 && sample.c_count > max_count )) {
137  sample.c_count = 0;
138  }
139  if( sample.g_count < min_count || ( max_count > 0 && sample.g_count > max_count )) {
140  sample.g_count = 0;
141  }
142  if( sample.t_count < min_count || ( max_count > 0 && sample.t_count > max_count )) {
143  sample.t_count = 0;
144  }
145 }
146 
147 void transform_zero_out_by_min_max_count( Variant& variant, size_t min_count, size_t max_count )
148 {
149  for( auto& sample : variant.samples ) {
150  transform_zero_out_by_min_max_count( sample, min_count, max_count );
151  }
152 }
153 
154 } // namespace population
155 } // namespace genesis
genesis::population::BaseCountsStatus
Definition: population/functions/functions.hpp:50
genesis::population::SampleFilterType::kConjunction
@ kConjunction
The filter returns true only if all of the BaseCounts samples in the Variant return true for a given ...
genesis::population::BaseCounts::t_count
size_t t_count
Count of all T nucleotides that are present in the sample.
Definition: base_counts.hpp:74
genesis::population::BaseCounts::g_count
size_t g_count
Count of all G nucleotides that are present in the sample.
Definition: base_counts.hpp:69
genesis::population::BaseCounts::a_count
size_t a_count
Count of all A nucleotides that are present in the sample.
Definition: base_counts.hpp:59
genesis::population::SampleFilterType
SampleFilterType
Select how Variant filter functions that evaluate properties of the Variant::samples (BaseCounts) obj...
Definition: filter_transform.hpp:58
genesis::population::SampleFilterType::kMerge
@ kMerge
The filter is applied to the merged BaseCounts of all samples in the Variant.
genesis::population::filter_by_status
bool filter_by_status(std::function< bool(BaseCountsStatus const &)> predicate, Variant const &variant, SampleFilterType type, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Filter a Variant based on a predicate that is applied to the result of a status() call on the BaseCou...
Definition: filter_transform.cpp:43
genesis::population::Variant::samples
std::vector< BaseCounts > samples
Definition: variant.hpp:69
genesis::population::merge
BaseCounts merge(BaseCounts const &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss.
Definition: population/functions/functions.cpp:372
genesis::population::Variant
A single variant at a position in a chromosome, along with BaseCounts for a set of samples.
Definition: variant.hpp:62
genesis::population::BaseCounts::c_count
size_t c_count
Count of all C nucleotides that are present in the sample.
Definition: base_counts.hpp:64
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::status
BaseCountsStatus status(BaseCounts const &sample, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Compute a simple status with useful properties from the counts of a BaseCounts.
Definition: population/functions/functions.cpp:49
genesis::population::BaseCounts
One set of nucleotide base counts, for example for a given sample that represents a pool of sequenced...
Definition: base_counts.hpp:54
genesis::population::transform_zero_out_by_min_max_count
void transform_zero_out_by_min_max_count(BaseCounts &sample, size_t min_count, size_t max_count)
Transform a BaseCounts sample by setting any nucleotide count (A, C, G, T) to zero if min_count is no...
Definition: filter_transform.cpp:129
genesis::population::transform_zero_out_by_min_count
void transform_zero_out_by_min_count(BaseCounts &sample, size_t min_count)
Transform a BaseCounts sample by setting any nucleotide count (A, C, G, T) to zero if min_count is no...
Definition: filter_transform.cpp:77
genesis::population::SampleFilterType::kDisjunction
@ kDisjunction
The filter returns true if any of the BaseCounts samples in the Variant return true for a given predi...
genesis::population::transform_zero_out_by_max_count
void transform_zero_out_by_max_count(BaseCounts &sample, size_t max_count)
Transform a BaseCounts sample by setting any nucleotide count (A, C, G, T) to zero if max_count is ex...
Definition: filter_transform.cpp:101
filter_transform.hpp