|
A library for working with phylogenetic and population genetic data.
v0.32.0
|
|
Go to the documentation of this file.
58 lookup_.
set_all(
static_cast<unsigned char>( characters_.size() ));
59 for(
size_t i = 0; i < characters_.size(); ++i ) {
60 auto c = characters_[i];
77 return counts_.
rows();
82 assert( counts_.
cols() == characters_.size() );
93 if( site_index >=
length() ) {
94 throw std::runtime_error(
95 "Invalid site index for retrieving count: " +
std::to_string( site_index ) +
"."
99 auto char_idx = lookup_[ character ];
100 if( char_idx == characters_.size() ) {
101 throw std::runtime_error(
102 "Invalid character for retrieving count: '" + std::string( 1, character ) +
"'."
106 return counts_( site_index, char_idx );
110 size_t character_index,
113 if( site_index >= counts_.
rows() ) {
114 throw std::runtime_error(
115 "Invalid site index for retrieving count: " +
std::to_string( site_index ) +
"."
118 if( character_index > counts_.
cols() ) {
119 throw std::runtime_error(
120 "Invalid character index for retrieving count: " +
std::to_string( character_index ) +
"."
124 return counts_( site_index, character_index );
133 if( use_abundance ) {
142 if( num_seqs_ >= std::numeric_limits< CountsIntType >::max() - weight ) {
143 throw std::runtime_error(
144 "Cannot add Sequence to SiteCounts as it might lead to an overflow in the counts."
147 if( sites.size() != counts_.
rows() ) {
148 throw std::runtime_error(
149 "Cannot add Sequence to SiteCounts if it has different number of sites: Expected "
155 for(
size_t site_idx = 0; site_idx < sites.size(); ++site_idx ) {
157 auto char_idx = lookup_[
static_cast< size_t >( sites[ site_idx ] ) ];
158 if( char_idx == characters_.size() ) {
163 counts_( site_idx, char_idx ) += weight;
172 for(
auto const& seq : sequences ) {
187 for(
auto& e : counts_ ) {
SiteCounts()=default
Default constructor.
CountsIntType added_sequences_count() const
Return the number of processed Sequences, i.e., how many Sequences were added in total.
void clear()
Clear the object, that is, delete everything.
CountsIntType count_of(char character, size_t site_index) const
Return the count of a specific character at a given site.
void add_sequences(SequenceSet const &sequences, bool use_abundances=true)
Process a SequenceSet and add its counts to the existing ones for all contained Sequences.
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
void add_sequence(Sequence const &sequence, bool use_abundance=true)
Process a single Sequence and add its counts to the existing ones.
std::string to_string(GenomeLocus const &locus)
Provides some commonly used string utility functions.
std::string normalize_code_alphabet(std::string const &alphabet)
Normalize an alphabet set of Sequence codes, i.e., make them upper case, sort them,...
size_t length() const
Return the number of sites used for counting.
void clear_counts()
Reset all counts to 0.
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Store a set of Sequences.
uint32_t CountsIntType
Type of uint used for internally counting the freuqencies of Sequence sites.
void set_char_upper_lower(char c, T value)
Set the lookup status for both the upper and lower case of a given char.
void set_all(T value)
Set the lookup status for all chars at once.
CountsIntType count_at(size_t character_index, size_t site_index) const
Return the count for a character and a site, given their indices.
std::string characters() const
Return the character set that is used for counting.