83 std::vector<double>
const& y_response,
91 auto const N = y_response.size();
92 auto const M = x_predictors.
cols();
95 assert( x_predictors.
rows() == N );
97 assert( extras.
strata.empty() || extras.
strata.size() == N );
100 auto y_working = std::vector<double>( N );
107 double logL_prev = 0.0;
109 for(
size_t i = 0; i < N; i++ ) {
122 auto xb_tmp = std::vector<double>();
127 for(
size_t i = 0; i < M; ++i ) {
133 result.
Xb.
col( xb_col ) = xb_tmp;
142 if( result.
rank > 0 ) {
143 for(
size_t j = 0; j < result.
rank; ++j ) {
148 result.
Xb.
col( xb_col ),
152 result.
Xb.
col( xb_col ) = xb_tmp;
155 result.
tri[ij] = bij;
162 if ((ssx > 0.0) && (ssr / ssx > 1.0 - control.
max_r2)) {
164 result.
Xb.
col( xb_col ),
172 assert(( xb_col < M ) xor ( i+1 == M ));
175 result.
tri[ij] = ssr;
176 result.
which[ii] = i;
177 result.
betaQ[ii] = bQi;
188 freedom.valid_entries = 0;
190 for(
size_t i = 0; i < N; ++i ) {
199 if( !( pi != 0.0 && ( result.
weights[i] > 0.0 ))) {
203 double const Vmu = family.
variance( mu );
204 ++freedom.valid_entries;
211 ri = ( y_response[i] - mu ) / Vmu;
215 ri = ( y_response[i] - mu ) / std::sqrt( Vmu );
220 ri =
signum( y_response[i] - mu ) * std::sqrt( ud );
224 throw std::invalid_argument(
"Invalid residual type specified." );
229 wi = pi / (D * D * Vmu);
230 ri = D * (y_response[i] - mu);
235 result.
resid[i] = ri;
238 long const dfr = freedom.degrees_of_freedom( result.
rank );
239 result.
df_resid = dfr > 0 ?
static_cast<size_t>( dfr ) : 0;
241 result.
scale = wss / (dfr);
246 double dL = (logL - logL_prev) / (result.
scale);
255 for(
size_t i = 0; i < N; i++ ) {
266 std::vector<double>
const& y_response,
273 auto const N = y_response.size();
274 auto const M = x_predictors.
cols();
277 assert( x_predictors.
rows() == N );
278 assert( extras.
strata.empty() || extras.
strata.size() == N );
281 auto xb_tmp = std::vector<double>();
286 for(
size_t i = 0; i < M; i++ ) {
291 result.
Xb.
col( xb_col ) = xb_tmp;
297 if( result.
rank > 0 ) {
298 for(
size_t j = 0; j < result.
rank; ++j ) {
301 result.
Xb.
col( xb_col ),
305 result.
Xb.
col( xb_col ) = xb_tmp;
308 result.
tri[ij] = bij;
315 if(( ssx > 0.0) && (ssr / ssx > 1.0 - control.
max_r2 )) {
317 result.
Xb.
col( xb_col ),
327 result.
tri[ij] = ssr;
328 result.
which[ii] = i;
329 result.
betaQ[ii] = bQi;
338 for(
size_t i = 0; i < N; ++i ) {
339 result.
fitted[i] = y_response[i] - result.
resid[i];
344 result.
scale = wss / dfr;
345 result.
df_resid = dfr > 0 ?
static_cast<size_t>( dfr ) : 0;
357 std::vector<double>
const& y_response,
365 auto const N = y_response.size();
366 auto const M = x_predictors.
cols();
369 if( x_predictors.
rows() != N ) {
370 throw std::invalid_argument(
"glm_fit: size of rows of x is not size of y." );
373 throw std::invalid_argument(
"glm_fit: size of initial fittings is not size of y." );
377 throw std::invalid_argument(
"glm_fit: size of prior weights is not size of y." );
380 if( ! extras.
strata.empty() && extras.
strata.size() != N ) {
381 throw std::invalid_argument(
"glm_fit: size of strata is not size of y." );
383 assert( extras.
strata.empty() || extras.
strata.size() == N );
385 throw std::invalid_argument(
"glm_fit: epsilon has to be in ( 0.0, 1.0 ]" );
388 throw std::invalid_argument(
"glm_fit: max_r2 has to be in ( 0.0, 1.0 )" );
391 throw std::invalid_argument(
"glm_fit: family is not properly defined." );
394 throw std::invalid_argument(
"glm_fit: link is not properly defined." );
402 result.
fitted = std::vector<double>( N );
403 result.
resid = std::vector<double>( N );
404 result.
weights = std::vector<double>( N );
405 result.
which = std::vector<double>( M );
406 result.
betaQ = std::vector<double>( M );
407 result.
tri = std::vector<double>( (M * ( M+1 )) / 2 );
410 bool const irls = (M > 0) && !(
431 for(
size_t i = 0; i < N; i++ ) {
432 double const mu = result.
fitted[i];
437 if( std::isfinite( ud )) {
442 if(( ! std::isfinite(pi) ) || ( pi < 0.0 )) {
443 throw std::invalid_argument(
"glm_fit: prior weights have to be non-negative." );
444 }
else if( pi == 0.0 ) {
445 result.
resid[i] = 0.0;
448 assert( std::isfinite( pi ) || ( pi > 0.0 ));
451 double const Vmu = family.
variance( mu );
455 result.
resid[i] = ( y_response[i] - mu ) / Vmu;
459 result.
resid[i] = ( y_response[i] - mu ) / std::sqrt( Vmu );
463 result.
resid[i] =
signum( y_response[i] - mu ) * std::sqrt( ud );
467 throw std::invalid_argument(
"Invalid residual type specified." );
473 result.
resid[i] = D * (y_response[i] - mu);
474 result.
weights[i] = pi / (D * D * Vmu);
487 glm_irls_( x_predictors, y_response, family, link, extras, control, result );
489 glm_gaussian_( x_predictors, y_response, extras, control, freedom, result );
494 for(
size_t i = 0; i < N; i++ ) {
496 if( std::isfinite( ud )) {
501 result.
deviance /=
static_cast<double>(N);
513 result.
df_resid = dfr > 0 ?
static_cast<size_t>( dfr ) : 0;
521 std::vector<double>
const& y_response,
527 throw std::runtime_error(
"glm_fit: family does not provide a canonical link." );
534 std::vector<double>
const& y_response,
539 return glm_fit( x_predictors, y_response, family, family.canonical_link(), extras, control );
549 std::vector<double>
glm_inv_tri_( std::vector<double>
const& tri,
size_t M )
551 if( tri.size() != (M * ( M+1 )) / 2 ) {
552 throw std::invalid_argument(
553 "glm_inv_tri_(): Input tri vector is expected to have tridiagonal form."
558 for(
size_t j = 0, ij = 0; j < M; j++ ) {
559 for(
size_t i = 0, ii1 = 1; i < j; i++, ii1 += (i+2) ) {
561 for(
size_t k = i+1, kj = ij+1, ik = ii1; k < j; k++, kj++, ik += k ) {
562 w += result[ik]*tri[kj];
566 double diag = tri[ij];
569 "glm_inv_tri_(): negative diagonal with j==" +
std::to_string(j) +
573 result[ij++] = 1/diag;
584 std::vector<double>
const& inv_tri
586 assert( inv_tri.size() == output.
tri.size() );
588 throw std::invalid_argument(
589 "Invalid size of betaQ for computing glm_estimate_betas()"
593 auto const M = output.
Xb.
cols();
594 auto betas = std::vector<double>( M );
595 for(
size_t i = 0, ijs = 1; i < M; i++, ijs += (2+i) ) {
596 double w = output.
betaQ[i];
597 for(
size_t j = i+1, ij = ijs; j < M; j++, ij += j ) {
598 w += output.
betaQ[j] * inv_tri[ij];
620 assert( U.size() == (M * ( M+1 )) / 2 );
632 auto result = std::vector<double>( U.size() );
633 for(
size_t j = 0, ij = 0, jj = 0; j < M; j++, jj += (1 + j) ) {
634 for(
size_t i = 0; i <= j; i++, ij++ ) {
637 size_t k = j, jk = jj, ik = jj - i + j, kk = jj;
639 k++, kk += (1 + k), jk += k, ik += k
641 double Uik = (i == k) ? 1.0 : U[ik];
642 double Ujk = (j == k) ? 1.0 : U[jk];
646 result[ij] =
scale * w;
660 size_t M, std::vector<double>
const& U, std::vector<double>
const& V,
double scale
662 assert( U.size() == (M * ( M+1 )) / 2 );
664 auto result = std::vector<double>( U.size() );
665 for(
size_t j = 0, ij = 0, jj = 0; j < M; jj += (2 + (j++)) ) {
666 for(
size_t i = 0, ii = 0; i <= j; ii += (2 + (i++)), ij++ ) {
669 size_t v = j, vv = jj, jv = jj, uv = ij;
671 v++, vv += (2 + v), jv += v
673 double Ujv = (v == j) ? 1.0 : U[jv];
675 for(
size_t u = i, uu = ii, iu = ii; u < M; u++, uu += (2 + u), iu += u ) {
676 double Uiu = (u == i) ? 1.0 : U[iu];
678 w += Du * Dv * Uiu * Ujv * V[uv];
687 result[ij] =
scale * w;
695 std::vector<double>
const& meat
697 throw std::runtime_error(
"glm_estimate_betas_and_var_covar() not implemented" );
699 auto const M = output.
Xb.
cols();
702 assert( betas.size() == M );
704 std::vector<double> vars_covars;
710 assert( vars_covars.size() == (M * ( M+1 )) / 2 );
711 return { betas, vars_covars };
716 std::vector<double>
const& y_response,
718 std::vector<double>
const& betas
727 std::vector<double>
const& y_response,
730 std::vector<double>
const& betas
732 if( betas.size() != x_predictors.
cols() ) {
733 throw std::invalid_argument(
734 "Invalid size of betas for computing glm_estimate_intercept()"
751 assert( std::isfinite( weight_sum ));
754 auto y_response_transformed = y_response;
755 for(
auto& y : y_response_transformed ) {
762 assert( std::isfinite( y_avg ));
766 double result = y_avg;
767 for(
size_t i = 0; i < x_predictors.
cols(); ++i ) {
769 assert( std::isfinite( x_col_avg ));
770 result -= betas[i] * x_col_avg;
777 std::vector<double>
const& y_response,
785 std::vector<double>
const& y_response,
791 coeffs.insert( coeffs.begin(), intercept );