A toolkit for working with phylogenetic data.
v0.24.0
glm.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_REGRESSION_GLM_H_
2 #define GENESIS_UTILS_MATH_REGRESSION_GLM_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2019 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
37 
38 #include <vector>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Generalized Linear Model
45 // =================================================================================================
46 
47 struct GlmExtras
48 {
49  std::vector<double> initial_fittings;
50  std::vector<double> prior_weights;
51  bool with_intercept = true;
52 
56  std::vector<size_t> strata;
57 
59  {
63  };
64 
65  ResidualType residual_type = ResidualType::kDefault;
66 
75  bool mean_deviance = false;
76 };
77 
78 struct GlmControl
79 {
83  size_t max_iterations = 25;
84 
89  double epsilon = 1.e-5;
90 
96  double max_r2 = 0.99;
97 };
98 
99 struct GlmOutput
100 {
101  bool converged = false;
102  size_t num_iterations = 0;
103 
107  size_t rank = 0;
108 
112  size_t df_resid = 0;
113 
117  double scale = 1.0;
118 
123 
127  std::vector<double> fitted;
128 
132  std::vector<double> resid;
133 
137  std::vector<double> weights;
138 
142  std::vector<double> which;
143 
147  std::vector<double> betaQ;
148 
153  std::vector<double> tri;
154 
155  double null_deviance = 0.0;
156  double deviance = 0.0;
157 };
158 
166  Matrix<double> const& x_predictors,
167  std::vector<double> const& y_response,
168  GlmFamily const& family,
169  GlmLink const& link,
170  GlmExtras const& extras = {},
171  GlmControl const& control = {}
172 );
173 
183  Matrix<double> const& x_predictors,
184  std::vector<double> const& y_response,
185  GlmFamily const& family,
186  GlmExtras const& extras = {},
187  GlmControl const& control = {}
188 );
189 
197  Matrix<double> const& x_predictors,
198  std::vector<double> const& y_response,
199  GlmExtras const& extras = {},
200  GlmControl const& control = {}
201 );
202 
203 } // namespace utils
204 } // namespace genesis
205 
206 #endif // include guard
std::vector< double > prior_weights
Definition: glm.hpp:50
std::vector< double > which
Which columns in the X matrix were estimated (first = 0) (size M).
Definition: glm.hpp:142
std::vector< double > tri
Upper unit triangular transformation matrix, with Xb - tr.Xb placed in the diagonal (size (M * (M+1))...
Definition: glm.hpp:153
ResidualType residual_type
Definition: glm.hpp:65
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
std::vector< double > weights
Weights (size N)
Definition: glm.hpp:137
std::vector< size_t > strata
Strata assignments coded 1...S.
Definition: glm.hpp:56
Matrix< double > Xb
Orthogonal basis for X space (N * M matrix, with N * rank being used).
Definition: glm.hpp:122
void scale(Histogram &h, double factor)
Definition: operations.cpp:54
std::vector< double > initial_fittings
Definition: glm.hpp:49
std::vector< double > resid
Working residuals (on linear predictor scale) (size N).
Definition: glm.hpp:132
GlmOutput glm_fit(Matrix< double > const &x_predictors, std::vector< double > const &y_response, GlmFamily const &family, GlmLink const &link, GlmExtras const &extras, GlmControl const &control)
Fit a Generalized Linear Model (GLM).
Definition: glm.cpp:355
std::vector< double > betaQ
Vector of parameter estimates (in terms of basis matrix, Xb) (size M).
Definition: glm.hpp:147
bool mean_deviance
Calculate mean null_deviance and mean deviance instead of their sums.
Definition: glm.hpp:75
std::vector< double > fitted
Fitted values (size N).
Definition: glm.hpp:127