58 lookup_.
set_all( static_cast<unsigned char>( characters_.size() ));
59 for(
size_t i = 0; i < characters_.size(); ++i ) {
60 auto c = characters_[i];
77 return counts_.
rows();
82 assert( counts_.
cols() == characters_.size() );
93 if( site_index >=
length() ) {
94 throw std::runtime_error(
95 "Invalid site index for retrieving count: " +
std::to_string( site_index ) +
"." 99 auto char_idx = lookup_[ character ];
100 if( char_idx == characters_.size() ) {
101 throw std::runtime_error(
102 "Invalid character for retrieving count: '" + std::string( 1, character ) +
"'." 106 return counts_( site_index, char_idx );
110 size_t character_index,
113 if( site_index >= counts_.
rows() ) {
114 throw std::runtime_error(
115 "Invalid site index for retrieving count: " +
std::to_string( site_index ) +
"." 118 if( character_index > counts_.
cols() ) {
119 throw std::runtime_error(
120 "Invalid character index for retrieving count: " +
std::to_string( character_index ) +
"." 124 return counts_( site_index, character_index );
133 if( use_abundance ) {
142 if( num_seqs_ >= std::numeric_limits< CountsIntType >::max() - weight ) {
143 throw std::runtime_error(
144 "Cannot add Sequence to SiteCounts as it might lead to an overflow in the counts." 147 if( sites.size() != counts_.
rows() ) {
148 throw std::runtime_error(
149 "Cannot add Sequence to SiteCounts if it has different number of sites: Expected " 155 for(
size_t site_idx = 0; site_idx < sites.size(); ++site_idx ) {
157 auto char_idx = lookup_[
static_cast< size_t >( sites[ site_idx ] ) ];
158 if( char_idx == characters_.size() ) {
163 counts_( site_idx, char_idx ) += weight;
172 for(
auto const& seq : sequences ) {
187 for(
auto& e : counts_ ) {
CountsIntType added_sequences_count() const
Return the number of processed Sequences, i.e., how many Sequences were added in total.
void clear()
Clear the object, that is, delete everything.
CountsIntType count_of(char character, size_t site_index) const
Return the count of a specific character at a given site.
SiteCounts()=default
Default constructor.
void set_all(T value)
Set the lookup status for all chars at once.
std::string normalize_code_alphabet(std::string const &alphabet)
Normalize an alphabet set of Sequence codes, i.e., make them upper case, sort them, and remove duplicates.
void add_sequences(SequenceSet const &sequences, bool use_abundances=true)
Process a SequenceSet and add its counts to the existing ones for all contained Sequences.
void add_sequence(Sequence const &sequence, bool use_abundance=true)
Process a single Sequence and add its counts to the existing ones.
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
size_t length() const
Return the number of sites used for counting.
void clear_counts()
Reset all counts to 0.
Provides some commonly used string utility functions.
Store a set of Sequences.
uint32_t CountsIntType
Type of uint used for internally counting the freuqencies of Sequence sites.
void set_char_upper_lower(char c, T value)
Set the lookup status for both the upper and lower case of a given char.
CountsIntType count_at(size_t character_index, size_t site_index) const
Return the count for a character and a site, given their indices.
double length(Tree const &tree)
Get the length of the tree, i.e., the sum of all branch lengths.
std::shared_ptr< BaseOutputTarget > to_string(std::string &target_string)
Obtain an output target for writing to a string.
std::string characters() const
Return the character set that is used for counting.