A library for working with phylogenetic and population genetic data.
v0.27.0
glm.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_REGRESSION_GLM_H_
2 #define GENESIS_UTILS_MATH_REGRESSION_GLM_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
37 
38 #include <vector>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Generalized Linear Model
45 // =================================================================================================
46 
47 struct GlmExtras
48 {
49  std::vector<double> initial_fittings;
50  std::vector<double> prior_weights;
51  bool with_intercept = true;
52 
56  std::vector<size_t> strata;
57 
59  {
63  };
64 
65  ResidualType residual_type = ResidualType::kDefault;
66 
75  bool mean_deviance = false;
76 };
77 
78 struct GlmControl
79 {
83  size_t max_iterations = 25;
84 
89  double epsilon = 1.e-5;
90 
96  double max_r2 = 0.99;
97 };
98 
99 struct GlmOutput
100 {
101  bool converged = false;
102  size_t num_iterations = 0;
103 
107  size_t rank = 0;
108 
112  size_t df_resid = 0;
113 
117  double scale = 1.0;
118 
123 
127  std::vector<double> fitted;
128 
132  std::vector<double> resid;
133 
137  std::vector<double> weights;
138 
142  std::vector<double> which;
143 
147  std::vector<double> betaQ;
148 
153  std::vector<double> tri;
154 
155  double null_deviance = 0.0;
156  double deviance = 0.0;
157 };
158 
166  Matrix<double> const& x_predictors,
167  std::vector<double> const& y_response,
168  GlmFamily const& family,
169  GlmLink const& link,
170  GlmExtras const& extras = {},
171  GlmControl const& control = {}
172 );
173 
182 GlmOutput glm_fit(
183  Matrix<double> const& x_predictors,
184  std::vector<double> const& y_response,
185  GlmFamily const& family,
186  GlmExtras const& extras = {},
187  GlmControl const& control = {}
188 );
189 
196 GlmOutput glm_fit(
197  Matrix<double> const& x_predictors,
198  std::vector<double> const& y_response,
199  GlmExtras const& extras = {},
200  GlmControl const& control = {}
201 );
202 
203 } // namespace utils
204 } // namespace genesis
205 
206 #endif // include guard
genesis::utils::GlmExtras::mean_deviance
bool mean_deviance
Calculate mean null_deviance and mean deviance instead of their sums.
Definition: glm.hpp:75
genesis::utils::GlmExtras::kPearsonResiduals
@ kPearsonResiduals
Definition: glm.hpp:61
genesis::utils::GlmOutput::rank
size_t rank
Rank of X after regression on strata.
Definition: glm.hpp:107
genesis::utils::GlmOutput
Definition: glm.hpp:99
genesis::utils::GlmOutput::scale
double scale
Scale factor (scalar).
Definition: glm.hpp:117
genesis::utils::GlmFamily
Definition: family.hpp:48
genesis::utils::GlmOutput::num_iterations
size_t num_iterations
Definition: glm.hpp:102
genesis::utils::GlmControl::max_iterations
size_t max_iterations
Maximum number of iterations to run the IRLS algorithm for (if needed).
Definition: glm.hpp:83
genesis::utils::GlmExtras
Definition: glm.hpp:47
genesis::utils::glm_fit
GlmOutput glm_fit(Matrix< double > const &x_predictors, std::vector< double > const &y_response, GlmFamily const &family, GlmLink const &link, GlmExtras const &extras, GlmControl const &control)
Fit a Generalized Linear Model (GLM).
Definition: glm.cpp:355
genesis::utils::GlmOutput::Xb
Matrix< double > Xb
Orthogonal basis for X space (N * M matrix, with N * rank being used).
Definition: glm.hpp:122
genesis::utils::GlmExtras::ResidualType
ResidualType
Definition: glm.hpp:58
genesis::utils::GlmControl
Definition: glm.hpp:78
genesis::utils::Matrix< double >
genesis::utils::GlmOutput::null_deviance
double null_deviance
Definition: glm.hpp:155
genesis::utils::GlmOutput::tri
std::vector< double > tri
Upper unit triangular transformation matrix, with Xb - tr.Xb placed in the diagonal (size (M * (M+1))...
Definition: glm.hpp:153
genesis::utils::GlmExtras::residual_type
ResidualType residual_type
Definition: glm.hpp:65
genesis::utils::GlmExtras::strata
std::vector< size_t > strata
Strata assignments coded 1...S.
Definition: glm.hpp:56
genesis::utils::GlmOutput::weights
std::vector< double > weights
Weights (size N)
Definition: glm.hpp:137
genesis::utils::GlmOutput::deviance
double deviance
Definition: glm.hpp:156
genesis::utils::GlmControl::max_r2
double max_r2
Threshold for singluarities. Internally used as eta = 1.0 - max_r2.
Definition: glm.hpp:96
matrix.hpp
genesis::utils::GlmOutput::converged
bool converged
Definition: glm.hpp:101
genesis::utils::GlmExtras::initial_fittings
std::vector< double > initial_fittings
Definition: glm.hpp:49
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::GlmControl::epsilon
double epsilon
Proportional change in weighted sum of squares residuals to declare convergence between two iteration...
Definition: glm.hpp:89
genesis::utils::GlmOutput::df_resid
size_t df_resid
Residual degrees of freedom.
Definition: glm.hpp:112
family.hpp
genesis::utils::GlmOutput::which
std::vector< double > which
Which columns in the X matrix were estimated (first = 0) (size M).
Definition: glm.hpp:142
genesis::utils::GlmExtras::kDefault
@ kDefault
Definition: glm.hpp:60
genesis::utils::GlmOutput::fitted
std::vector< double > fitted
Fitted values (size N).
Definition: glm.hpp:127
genesis::utils::GlmExtras::with_intercept
bool with_intercept
Definition: glm.hpp:51
genesis::utils::GlmExtras::prior_weights
std::vector< double > prior_weights
Definition: glm.hpp:50
genesis::utils::GlmOutput::betaQ
std::vector< double > betaQ
Vector of parameter estimates (in terms of basis matrix, Xb) (size M).
Definition: glm.hpp:147
genesis::utils::GlmExtras::kDevianceResiduals
@ kDevianceResiduals
Definition: glm.hpp:62
genesis::utils::GlmOutput::resid
std::vector< double > resid
Working residuals (on linear predictor scale) (size N).
Definition: glm.hpp:132