54 if( site_idx >= counts.
length() ) {
55 throw std::runtime_error(
56 "Invalid site index for calculating site entropy: " +
std::to_string( site_idx ) +
"."
61 auto const ln_2 = std::log( 2.0 );
63 auto const num_chars = counts.
characters().size();
70 for(
size_t char_idx = 0; char_idx < num_chars; ++char_idx ) {
71 auto char_count = counts.
count_at( char_idx, site_idx );
72 counts_sum += char_count;
74 double char_prob =
static_cast<double>( char_count ) / num_seqs;
75 if( char_prob > 0.0 ) {
76 entropy -= char_prob * std::log( char_prob ) / ln_2;
82 assert( counts_sum <= num_seqs );
83 double gap_prob = 1.0 - (
static_cast<double>( counts_sum ) / num_seqs );
84 if( gap_prob > 0.0 ) {
85 entropy -= gap_prob * std::log( gap_prob ) / ln_2;
91 entropy *= (
static_cast<double>( counts_sum ) / num_seqs );
99 hmax = std::log1p(
static_cast<double>( num_chars )) / ln_2;
101 hmax = std::log(
static_cast<double>( num_chars )) / ln_2;
103 return entropy / hmax;
113 bool use_small_sample_correction,
117 auto const num_chars =
static_cast<double>( counts.
characters().size() );
118 auto const log_num = log( num_chars ) / log( 2.0 );
122 if( use_small_sample_correction ) {
129 return log_num -
site_entropy( counts, site_index, options ) - e;
141 for(
size_t site_idx = 0; site_idx < counts.
length(); ++site_idx ) {
149 bool only_determined_sites,
154 size_t determined_sites = 0;
157 auto const num_chars = counts.
characters().size();
159 for(
size_t site_idx = 0; site_idx < counts.
length(); ++site_idx ) {
163 if( only_determined_sites ) {
165 for(
size_t char_idx = 0; char_idx < num_chars; ++char_idx ) {
166 det |= ( counts.
count_at( char_idx, site_idx ) > 0 );
174 if( only_determined_sites ) {
175 return sum /
static_cast<double>( determined_sites );
177 return sum /
static_cast<double>( counts.
length() );
183 bool use_small_sample_correction,
187 for(
size_t site_idx = 0; site_idx < counts.
length(); ++site_idx ) {
188 sum +=
site_information( counts, site_idx, use_small_sample_correction, per_site_options );
195 bool only_determined_sites,
196 bool use_small_sample_correction,
201 size_t determined_sites = 0;
204 auto const num_chars = counts.
characters().size();
206 for(
size_t site_idx = 0; site_idx < counts.
length(); ++site_idx ) {
207 sum +=
site_information( counts, site_idx, use_small_sample_correction, per_site_options );
210 if( only_determined_sites ) {
212 for(
size_t char_idx = 0; char_idx < num_chars; ++char_idx ) {
213 det |= ( counts.
count_at( char_idx, site_idx ) > 0 );
221 if( only_determined_sites ) {
222 return sum /
static_cast<double>( determined_sites );
224 return sum /
static_cast<double>( counts.
length() );