A library for working with phylogenetic and population genetic data.
v0.32.0
family.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_REGRESSION_FAMILY_H_
2 #define GENESIS_UTILS_MATH_REGRESSION_FAMILY_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <cassert>
37 #include <cmath>
38 #include <cstdint>
39 #include <functional>
40 
41 namespace genesis {
42 namespace utils {
43 
44 // =================================================================================================
45 // Distribution Families
46 // =================================================================================================
47 
48 struct GlmFamily
49 {
53  enum Family{
59  };
60 
65 
71 
75  std::function<double( double mu )> variance;
76 
80  std::function<double( double y, double mu )> log_likelihood;
81 
85  std::function<double( double mu )> rectify;
86 
90  std::function<double( double y, double mu )> unit_deviance;
91 
95  std::function<GlmLink()> canonical_link;
96 };
97 
101 inline bool is_defined( GlmFamily const& family )
102 {
103  return family.variance && family.log_likelihood && family.rectify && family.unit_deviance;
104 }
105 
106 inline bool is_canonical_link( GlmFamily const& family, GlmLink const& link )
107 {
108  return ( family.canonical_link_id == link.id );
109 }
110 
111 // =================================================================================================
112 // Binomial Distribution
113 // =================================================================================================
114 
121 {
122  GlmFamily family;
123  family.id = GlmFamily::kBinomial;
125  family.variance = []( double mu )
126  {
127  return mu * (1.0 - mu);
128  };
129  family.log_likelihood = []( double y, double mu )
130  {
131  // Calculate both parts.
132  double l = y * std::log(mu);
133  double r = (1.0 - y) * std::log(1.0 - mu);
134 
135  // Because of floating point nan propagation, we need to reset if needed.
136  l = ( std::isfinite(l) ? l : 0.0 );
137  r = ( std::isfinite(r) ? r : 0.0 );
138 
139  return l + r;
140  };
141  family.rectify = []( double mu )
142  {
143  const double zero = 1.e-10;
144  const double one = 1.0 - 1.e-10;
145 
146  if( mu < zero ) {
147  return zero;
148  } else if( mu > one ) {
149  return one;
150  }
151  return mu;
152  };
153  family.unit_deviance = []( double y, double mu )
154  {
155  // Calculate both parts.
156  double l = y * std::log( y / mu );
157  double r = ( 1.0 - y ) * std::log(( 1.0 - y ) / ( 1.0 - mu ));
158 
159  // Because of floating point nan propagation, we need to reset if needed.
160  l = ( std::isfinite(l) ? l : 0.0 );
161  r = ( std::isfinite(r) ? r : 0.0 );
162 
163  return 2.0 * ( l + r );
164  };
165  family.canonical_link = []()
166  {
167  return glm_link_logit();
168  };
169  return family;
170 }
171 
172 // =================================================================================================
173 // Poison Distribution
174 // =================================================================================================
175 
182 {
183  GlmFamily family;
184  family.id = GlmFamily::kPoisson;
186  family.variance = []( double mu )
187  {
188  return mu;
189  };
190  family.log_likelihood = []( double y, double mu )
191  {
192  assert( mu > 0.0 );
193  return y * std::log(mu) - mu;
194  };
195  family.rectify = []( double mu )
196  {
197  const double zero = 1.e-10;
198 
199  if( mu < zero ) {
200  return zero;
201  }
202  return mu;
203  };
204  family.unit_deviance = []( double y, double mu )
205  {
206  assert( y > 0.0 );
207  assert( mu > 0.0 );
208  return 2.0 * ( y * std::log( y / mu ) - ( y - mu ));
209  };
210  family.canonical_link = []()
211  {
212  return glm_link_log();
213  };
214  return family;
215 }
216 
217 // =================================================================================================
218 // Gaussian Distribution
219 // =================================================================================================
220 
227 {
228  GlmFamily family;
229  family.id = GlmFamily::kGaussian;
231  family.variance = []( double mu )
232  {
233  (void) mu;
234  return 1.0;
235  };
236  family.log_likelihood = []( double y, double mu )
237  {
238  auto const x = y - mu;
239  return x * x;
240  };
241  family.rectify = []( double mu )
242  {
243  return mu;
244  };
245  family.unit_deviance = []( double y, double mu )
246  {
247  double const d = y - mu;
248  return d * d;
249  };
250  family.canonical_link = []()
251  {
252  return glm_link_identity();
253  };
254  return family;
255 }
256 
257 // =================================================================================================
258 // Gamma Distribution
259 // =================================================================================================
260 
267 {
268  GlmFamily family;
269  family.id = GlmFamily::kGamma;
271  family.variance = []( double mu )
272  {
273  return mu * mu;
274  };
275  family.log_likelihood = []( double y, double mu )
276  {
277  auto const x = y / mu;
278  assert( x > 0.0 );
279  return std::log(x) - x;
280  };
281  family.rectify = []( double mu )
282  {
283  return mu;
284  };
285  family.unit_deviance = []( double y, double mu )
286  {
287  double const f = y / mu;
288  assert( f > 0.0 );
289  return 2.0 * ( f - std::log( f ) - 1.0 );
290  };
291  family.canonical_link = []()
292  {
293  return glm_link_inverse();
294  };
295  return family;
296 }
297 
298 } // namespace utils
299 } // namespace genesis
300 
301 #endif // include guard
genesis::utils::GlmFamily::kGaussian
@ kGaussian
Definition: family.hpp:57
genesis::utils::GlmFamily::canonical_link_id
GlmLink::Link canonical_link_id
Internal ID of the GlmLink, used to check if the link is the canonical one for a given distribution f...
Definition: family.hpp:70
genesis::utils::GlmFamily
Definition: family.hpp:48
genesis::utils::is_defined
bool is_defined(GlmFamily const &family)
Check whether all necessary values and functors of a GlmFamily are set.
Definition: family.hpp:101
genesis::utils::GlmFamily::kBinomial
@ kBinomial
Definition: family.hpp:55
genesis::utils::GlmFamily::kGamma
@ kGamma
Definition: family.hpp:58
genesis::utils::glm_link_identity
GlmLink glm_link_identity()
Identity link functions.
Definition: utils/math/regression/link.hpp:153
genesis::utils::is_canonical_link
bool is_canonical_link(GlmFamily const &family, GlmLink const &link)
Definition: family.hpp:106
genesis::utils::GlmFamily::Family
Family
List of common GLM Families.
Definition: family.hpp:53
genesis::utils::glm_family_poisson
GlmFamily glm_family_poisson()
Poisson family functions.
Definition: family.hpp:181
genesis::utils::GlmFamily::canonical_link
std::function< GlmLink()> canonical_link
Get the canonical link function.
Definition: family.hpp:95
genesis::utils::glm_link_logit
GlmLink glm_link_logit()
Logit link functions.
Definition: utils/math/regression/link.hpp:97
genesis::utils::glm_link_inverse
GlmLink glm_link_inverse()
Inverse link functions.
Definition: utils/math/regression/link.hpp:182
genesis::utils::GlmFamily::kUnknown
@ kUnknown
Definition: family.hpp:54
genesis::utils::GlmFamily::kPoisson
@ kPoisson
Definition: family.hpp:56
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::glm_family_gaussian
GlmFamily glm_family_gaussian()
Gaussian/normal family functions.
Definition: family.hpp:226
genesis::utils::GlmFamily::rectify
std::function< double(double mu)> rectify
Rectify to a valid value, for the fitted mean, to avoid extreme predictions.
Definition: family.hpp:85
genesis::utils::glm_family_binomial
GlmFamily glm_family_binomial()
Binomial family functions.
Definition: family.hpp:120
genesis::utils::GlmFamily::log_likelihood
std::function< double(double y, double mu)> log_likelihood
Log-Likelihood contribution of a value. To be multiplied by prior weight.
Definition: family.hpp:80
genesis::utils::GlmFamily::variance
std::function< double(double mu)> variance
Variance function for the distribution family.
Definition: family.hpp:75
genesis::utils::glm_link_log
GlmLink glm_link_log()
Log link functions.
Definition: utils/math/regression/link.hpp:125
genesis::utils::glm_family_gamma
GlmFamily glm_family_gamma()
Gamma family functions.
Definition: family.hpp:266
genesis::utils::GlmFamily::id
Family id
Internal ID of the GlmFamily, used to check for specific families where needed.
Definition: family.hpp:64
genesis::utils::GlmFamily::unit_deviance
std::function< double(double y, double mu)> unit_deviance
Unit deviance for the distribution family.
Definition: family.hpp:90