A library for working with phylogenetic and population genetic data.
v0.27.0
sequence/functions/entropy.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_SEQUENCE_FUNCTIONS_ENTROPY_H_
2 #define GENESIS_SEQUENCE_FUNCTIONS_ENTROPY_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
34 #include <cstddef> // size_t
35 #include <type_traits> // underlying_type_t
36 #include <string>
37 
38 namespace genesis {
39 namespace sequence {
40 
41 // =================================================================================================
42 // Forwad Declarations
43 // =================================================================================================
44 
45 class Sequence;
46 class SiteCounts;
47 class SequenceSet;
48 
49 // =================================================================================================
50 // Entropy Options
51 // =================================================================================================
52 
70 enum class SiteEntropyOptions : unsigned char
71 {
76  kDefault = 0,
77 
106  kIncludeGaps = 1,
107 
123  kWeighted = 2,
124 
136  kNormalized = 4
137 };
138 
150 {
151  using T = std::underlying_type< SiteEntropyOptions >::type;
152  return static_cast< SiteEntropyOptions >(
153  static_cast< T >( lhs ) | static_cast< T >( rhs )
154  );
155 }
156 
169 {
170  using T = std::underlying_type< SiteEntropyOptions >::type;
171  lhs = static_cast< SiteEntropyOptions >(
172  static_cast< T >( lhs ) | static_cast< T >( rhs )
173  );
174  return lhs;
175 }
176 
191 {
192  using T = std::underlying_type< SiteEntropyOptions >::type;
193  return static_cast< T >( lhs ) & static_cast< T >( rhs );
194 }
195 
196 // =================================================================================================
197 // Per Site Entropy and Information
198 // =================================================================================================
199 
211 double site_entropy(
212  SiteCounts const& counts,
213  size_t site_index,
215 );
216 
236 double site_information(
237  SiteCounts const& counts,
238  size_t site_index,
239  bool use_small_sample_correction = false,
241 );
242 
243 // =================================================================================================
244 // Total Entropy and Information
245 // =================================================================================================
246 
254 double absolute_entropy(
255  SiteCounts const& counts,
257 );
258 
276 double average_entropy(
277  SiteCounts const& counts,
278  bool only_determined_sites = false,
280 );
281 
291 double absolute_information(
292  SiteCounts const& counts,
293  bool use_small_sample_correction = false,
295 );
296 
314 double average_information(
315  SiteCounts const& counts,
316  bool only_determined_sites = false,
317  bool use_small_sample_correction = false,
319 );
320 
321 } // namespace sequence
322 } // namespace genesis
323 
324 #endif // include guard
genesis::sequence::SiteEntropyOptions::kWeighted
@ kWeighted
Weight the entropy using the summed relative frequencies of the characters.
genesis::sequence::operator|=
SiteEntropyOptions & operator|=(SiteEntropyOptions &lhs, SiteEntropyOptions rhs)
Or-assignment-operator to combine two SiteEntropyOptionss.
Definition: sequence/functions/entropy.hpp:168
genesis::sequence::absolute_entropy
double absolute_entropy(SiteCounts const &counts, SiteEntropyOptions per_site_options)
Return the sum of all site entropies.
Definition: sequence/functions/entropy.cpp:136
genesis::sequence::average_information
double average_information(SiteCounts const &counts, bool only_determined_sites, bool use_small_sample_correction, SiteEntropyOptions per_site_options)
Calculate the information content across all sites of a SiteCounts object.
Definition: sequence/functions/entropy.cpp:193
genesis::sequence::operator&
bool operator&(SiteEntropyOptions lhs, SiteEntropyOptions rhs)
And-operator to check whether a SiteEntropyOptions is set.
Definition: sequence/functions/entropy.hpp:190
genesis::sequence::SiteEntropyOptions::kNormalized
@ kNormalized
Normalize the resulting entropy using the maximum entropy possible.
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::sequence::site_entropy
double site_entropy(SiteCounts const &counts, size_t site_idx, SiteEntropyOptions options)
Calculate the entropy at one site of a SiteCounts object.
Definition: sequence/functions/entropy.cpp:49
genesis::sequence::SiteEntropyOptions
SiteEntropyOptions
Option flags to refine the calculation of site_entropy().
Definition: sequence/functions/entropy.hpp:70
genesis::sequence::site_information
double site_information(SiteCounts const &counts, size_t site_index, bool use_small_sample_correction, SiteEntropyOptions options)
Calculate the information content at one site of a SiteCounts object.
Definition: sequence/functions/entropy.cpp:110
genesis::sequence::average_entropy
double average_entropy(SiteCounts const &counts, bool only_determined_sites, SiteEntropyOptions per_site_options)
Return the average sum of all site entropies.
Definition: sequence/functions/entropy.cpp:147
genesis::sequence::absolute_information
double absolute_information(SiteCounts const &counts, bool use_small_sample_correction, SiteEntropyOptions per_site_options)
Calculate the information content across all sites of a SiteCounts object.
Definition: sequence/functions/entropy.cpp:181
genesis::sequence::SiteEntropyOptions::kIncludeGaps
@ kIncludeGaps
In addition to the characters of the SiteCounts object, use the undetermined and gap characters.
genesis::sequence::operator|
SiteEntropyOptions operator|(SiteEntropyOptions lhs, SiteEntropyOptions rhs)
Or-operator to combine two SiteEntropyOptionss.
Definition: sequence/functions/entropy.hpp:149
genesis::sequence::SiteEntropyOptions::kDefault
@ kDefault
Default option, simply calculate the site entropy using the characters used in the SiteCounts object.