A library for working with phylogenetic data.
v0.25.0
functions/base_counts.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FUNCTIONS_BASE_COUNTS_H_
2 #define GENESIS_POPULATION_FUNCTIONS_BASE_COUNTS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <iosfwd>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 namespace population {
44 
45 // =================================================================================================
46 // Status and Information
47 // =================================================================================================
48 
50 {
64  bool is_covered = false;
65 
76  bool is_snp = false;
77 
88  bool is_biallelic = false;
89 
100  bool is_ignored = false;
101 };
102 
151  BaseCounts const& sample,
152  size_t min_coverage = 0,
153  size_t max_coverage = 0,
154  size_t min_count = 0,
155  bool tolerate_deletions = false
156 );
157 
163 size_t get_base_count( BaseCounts const& bc, char base );
164 
165 // =================================================================================================
166 // Accumulation, Filtering, etc
167 // =================================================================================================
168 
177 inline size_t nucleotide_sum( BaseCounts const& sample )
178 {
179  return sample.a_count + sample.c_count + sample.g_count + sample.t_count;
180 }
181 
186 void merge_inplace( BaseCounts& p1, BaseCounts const& p2 );
187 
191 BaseCounts merge( BaseCounts const& p1, BaseCounts const& p2 );
192 
196 BaseCounts merge( std::vector<BaseCounts> const& p );
197 
206 void filter_min_count( BaseCounts& sample, size_t min_count );
207 
217 std::pair<char, double> consensus( BaseCounts const& sample );
218 
228 std::pair<char, double> consensus( BaseCounts const& sample, BaseCountsStatus const& status );
229 
230 // =================================================================================================
231 // Conversion Functions
232 // =================================================================================================
233 
234 BaseCounts convert_to_base_counts(
235  SimplePileupReader::Sample const& sample,
236  unsigned char min_phred_score = 0
237 );
238 
242 std::ostream& operator<<( std::ostream& os, BaseCounts const& bs );
243 
250 std::ostream& to_sync( BaseCounts const& bs, std::ostream& os );
251 
252 } // namespace population
253 } // namespace genesis
254 
255 #endif // include guard
base_counts.hpp
genesis::population::BaseCountsStatus
Definition: functions/base_counts.hpp:49
genesis::population::get_base_count
size_t get_base_count(BaseCounts const &bc, char base)
Get the count for a base given as a char.
Definition: base_counts.cpp:96
genesis::population::BaseCounts::t_count
size_t t_count
Count of all T nucleotides that are present in the sample.
Definition: base_counts.hpp:73
genesis::population::filter_min_count
void filter_min_count(BaseCounts &sample, size_t min_count)
Filter by minimum count that we need for a type of nucleotide (A, C, G, T) to be considered; set to z...
Definition: base_counts.cpp:174
genesis::population::BaseCountsStatus::is_biallelic
bool is_biallelic
Is the Sample biallelic?
Definition: functions/base_counts.hpp:88
genesis::population::BaseCounts::g_count
size_t g_count
Count of all G nucleotides that are present in the sample.
Definition: base_counts.hpp:68
genesis::population::BaseCounts::a_count
size_t a_count
Count of all A nucleotides that are present in the sample.
Definition: base_counts.hpp:58
genesis::population::operator<<
std::ostream & operator<<(std::ostream &os, BaseCounts const &bs)
Output stream operator for BaseCounts instances.
Definition: base_counts.cpp:345
genesis::population::consensus
std::pair< char, double > consensus(BaseCounts const &sample)
Consensus character for a BaseCounts, and its confidence.
Definition: base_counts.cpp:191
genesis::population::BaseCountsStatus::is_snp
bool is_snp
Does the Sample have two or more alleles?
Definition: functions/base_counts.hpp:76
genesis::population::nucleotide_sum
size_t nucleotide_sum(BaseCounts const &sample)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
Definition: functions/base_counts.hpp:177
genesis::population::convert_to_base_counts
BaseCounts convert_to_base_counts(SimplePileupReader::Sample const &sample, unsigned char min_phred_score)
Definition: base_counts.cpp:245
genesis::population::merge
BaseCounts merge(BaseCounts const &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss.
Definition: base_counts.cpp:153
simple_pileup_reader.hpp
genesis::population::BaseCounts::c_count
size_t c_count
Count of all C nucleotides that are present in the sample.
Definition: base_counts.hpp:63
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::status
BaseCountsStatus status(BaseCounts const &sample, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Compute a simple status with useful properties from the counts of a BaseCounts.
Definition: base_counts.cpp:46
genesis::population::BaseCounts
One set of nucleotide base counts, for example for a given sample that represents a pool of sequenced...
Definition: base_counts.hpp:53
genesis::population::to_sync
std::ostream & to_sync(BaseCounts const &bs, std::ostream &os)
Output a BaseCounts instance to a stream in the PoPoolation2 sync format.
Definition: base_counts.cpp:352
genesis::population::merge_inplace
void merge_inplace(BaseCounts &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss, by adding the counts of the second (p2) to the first (p1).
Definition: base_counts.cpp:136
genesis::population::BaseCountsStatus::is_ignored
bool is_ignored
Is the Sample ignored due to high deletions count?
Definition: functions/base_counts.hpp:100
genesis::population::BaseCountsStatus::is_covered
bool is_covered
Is the Sample covered by enough reads/nucleotides?
Definition: functions/base_counts.hpp:64