A library for working with phylogenetic and population genetic data.
v0.27.0
population/functions/functions.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_POPULATION_FUNCTIONS_FUNCTIONS_H_
2 #define GENESIS_POPULATION_FUNCTIONS_FUNCTIONS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2022 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <array>
38 #include <iosfwd>
39 #include <utility>
40 #include <string>
41 #include <vector>
42 
43 namespace genesis {
44 namespace population {
45 
46 // =================================================================================================
47 // Status and Information
48 // =================================================================================================
49 
51 {
65  bool is_covered = false;
66 
77  bool is_snp = false;
78 
89  bool is_biallelic = false;
90 
101  bool is_ignored = false;
102 };
103 
152  BaseCounts const& sample,
153  size_t min_coverage = 0,
154  size_t max_coverage = 0,
155  size_t min_count = 0,
156  bool tolerate_deletions = false
157 );
158 
159 // =================================================================================================
160 // Counts
161 // =================================================================================================
162 
168 size_t get_base_count( BaseCounts const& bc, char base );
169 
175 BaseCounts total_base_counts( Variant const& variant );
176 
177 // =================================================================================================
178 // Sorting
179 // =================================================================================================
180 
185 
193 std::pair<SortedBaseCounts, SortedBaseCounts> sorted_average_base_counts(
194  BaseCounts const& sample_a,
195  BaseCounts const& sample_b
196 );
197 
206  Variant const& variant, bool reference_first
207 );
208 
209 // =================================================================================================
210 // Merging
211 // =================================================================================================
212 
222 inline size_t nucleotide_sum( BaseCounts const& sample )
223 {
224  return sample.a_count + sample.c_count + sample.g_count + sample.t_count;
225 }
226 
232 inline size_t total_nucleotide_sum( Variant const& variant )
233 {
234  return nucleotide_sum( total_base_counts( variant ));
235 }
236 
241 void merge_inplace( BaseCounts& p1, BaseCounts const& p2 );
242 
246 BaseCounts merge( BaseCounts const& p1, BaseCounts const& p2 );
247 
251 BaseCounts merge( std::vector<BaseCounts> const& p );
252 
253 // =================================================================================================
254 // Consensus
255 // =================================================================================================
256 
266 std::pair<char, double> consensus( BaseCounts const& sample );
267 
277 std::pair<char, double> consensus( BaseCounts const& sample, BaseCountsStatus const& status );
278 
288 char guess_reference_base( Variant const& variant );
289 
303 char guess_alternative_base( Variant const& variant, bool force = true );
304 
305 // =================================================================================================
306 // Output
307 // =================================================================================================
308 
312 std::ostream& operator<<( std::ostream& os, BaseCounts const& bs );
313 
314 } // namespace population
315 } // namespace genesis
316 
317 #endif // include guard
genesis::population::guess_reference_base
char guess_reference_base(Variant const &variant)
Guess the reference base of a Variant.
Definition: population/functions/functions.cpp:447
base_counts.hpp
genesis::population::BaseCountsStatus
Definition: population/functions/functions.hpp:50
genesis::population::get_base_count
size_t get_base_count(BaseCounts const &bc, char base)
Get the count for a base given as a char.
Definition: population/functions/functions.cpp:103
genesis::population::SortedBaseCounts
Ordered array of base counts for the four nucleotides.
Definition: base_counts.hpp:110
genesis::population::BaseCounts::t_count
size_t t_count
Count of all T nucleotides that are present in the sample.
Definition: base_counts.hpp:74
genesis::population::BaseCountsStatus::is_biallelic
bool is_biallelic
Is the Sample biallelic?
Definition: population/functions/functions.hpp:89
genesis::population::BaseCounts::g_count
size_t g_count
Count of all G nucleotides that are present in the sample.
Definition: base_counts.hpp:69
genesis::population::BaseCounts::a_count
size_t a_count
Count of all A nucleotides that are present in the sample.
Definition: base_counts.hpp:59
genesis::population::sorted_average_base_counts
std::pair< SortedBaseCounts, SortedBaseCounts > sorted_average_base_counts(BaseCounts const &sample_a, BaseCounts const &sample_b)
Return the sorted base counts of both input samples, orderd by the average frequencies of the nucleot...
Definition: population/functions/functions.cpp:221
genesis::population::operator<<
std::ostream & operator<<(std::ostream &os, BaseCounts const &bs)
Output stream operator for BaseCounts instances.
Definition: population/functions/functions.cpp:486
genesis::population::total_nucleotide_sum
size_t total_nucleotide_sum(Variant const &variant)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
Definition: population/functions/functions.hpp:232
genesis::population::consensus
std::pair< char, double > consensus(BaseCounts const &sample)
Consensus character for a BaseCounts, and its confidence.
Definition: population/functions/functions.cpp:397
genesis::population::BaseCountsStatus::is_snp
bool is_snp
Does the Sample have two or more alleles?
Definition: population/functions/functions.hpp:77
genesis::population::nucleotide_sum
size_t nucleotide_sum(BaseCounts const &sample)
Count of the pure nucleotide bases at this position, that is, the sum of all A, C,...
Definition: population/functions/functions.hpp:222
genesis::population::merge
BaseCounts merge(BaseCounts const &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss.
Definition: population/functions/functions.cpp:372
genesis::population::Variant
A single variant at a position in a chromosome, along with BaseCounts for a set of samples.
Definition: variant.hpp:62
genesis::population::BaseCounts::c_count
size_t c_count
Count of all C nucleotides that are present in the sample.
Definition: base_counts.hpp:64
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::population::guess_alternative_base
char guess_alternative_base(Variant const &variant, bool force)
Guess the alternative base of a Variant.
Definition: population/functions/functions.cpp:463
genesis::population::status
BaseCountsStatus status(BaseCounts const &sample, size_t min_coverage, size_t max_coverage, size_t min_count, bool tolerate_deletions)
Compute a simple status with useful properties from the counts of a BaseCounts.
Definition: population/functions/functions.cpp:49
genesis::population::BaseCounts
One set of nucleotide base counts, for example for a given sample that represents a pool of sequenced...
Definition: base_counts.hpp:54
genesis::population::total_base_counts
BaseCounts total_base_counts(Variant const &variant)
Get the summed up total base counts of a Variant.
Definition: population/functions/functions.cpp:139
genesis::population::sorted_base_counts
SortedBaseCounts sorted_base_counts(BaseCounts const &sample)
Return the order of base counts (nucleotides), largest one first.
Definition: population/functions/functions.cpp:191
genesis::population::merge_inplace
void merge_inplace(BaseCounts &p1, BaseCounts const &p2)
Merge the counts of two BaseCountss, by adding the counts of the second (p2) to the first (p1).
Definition: population/functions/functions.cpp:355
variant.hpp
genesis::population::BaseCountsStatus::is_ignored
bool is_ignored
Is the Sample ignored due to high deletions count?
Definition: population/functions/functions.hpp:101
genesis::population::BaseCountsStatus::is_covered
bool is_covered
Is the Sample covered by enough reads/nucleotides?
Definition: population/functions/functions.hpp:65