glm_8hpp_source.html

#ifndef GENESIS_UTILS_MATH_REGRESSION_GLM_H_

#define GENESIS_UTILS_MATH_REGRESSION_GLM_H_


/*

    Genesis - A toolkit for working with phylogenetic data.

    Copyright (C) 2014-2024 Lucas Czech


    This program is free software: you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation, either version 3 of the License, or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License

    along with this program.  If not, see <http://www.gnu.org/licenses/>.


    Contact:

    Lucas Czech <lucas.czech@h-its.org>

    Exelixis Lab, Heidelberg Institute for Theoretical Studies

    Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany

*/


#include "genesis/utils/containers/matrix.hpp"

#include "genesis/utils/math/regression/family.hpp"

#include "genesis/utils/math/regression/link.hpp"


#include <utility>

#include <vector>


namespace genesis {

namespace utils {


// =================================================================================================

//     GLM Data Structures

// =================================================================================================


struct GlmExtras

{

    std::vector<double> initial_fittings;

    std::vector<double> prior_weights;

    bool                with_intercept = true;


    std::vector<size_t> strata;


    enum ResidualType

    {

        kDefault,

        kPearsonResiduals,

        kDevianceResiduals

    };


    ResidualType residual_type = ResidualType::kDefault;


    bool mean_deviance = false;

};


struct GlmControl

{

    size_t max_iterations = 25;


    double epsilon = 1.e-5;


    double max_r2 = 0.99;

};


struct GlmOutput

{

    bool converged = false;

    size_t num_iterations = 0;


    size_t rank = 0;


    size_t df_resid = 0;


    double scale = 1.0;


    Matrix<double> Xb;


    std::vector<double> fitted;


    std::vector<double> resid;


    std::vector<double> weights;


    std::vector<double> which;


    std::vector<double> betaQ;


    std::vector<double> tri;


    double null_deviance = 0.0;


    double deviance = 0.0;

};


// =================================================================================================

//     GLM Fit

// =================================================================================================


GlmOutput glm_fit(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmFamily const&           family,

    GlmLink const&             link,

    GlmExtras const&           extras = {},

    GlmControl const&          control = {}

);


GlmOutput glm_fit(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmFamily const&           family,

    GlmExtras const&           extras = {},

    GlmControl const&          control = {}

);


GlmOutput glm_fit(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmExtras const&           extras = {},

    GlmControl const&          control = {}

);


// =================================================================================================

//     GLM Output

// =================================================================================================


std::vector<double> glm_estimate_betas( GlmOutput const& output );


// /**

//  * @brief Obtain beta estimates and variance covariance matrix of estimates from output the output

//  * of glm_fit().

//  *

//  * The resulting variance covariance matrix is a packed symmetric matrix with the size of the

//  * number of predictor variables (which is the size of the betas).

//  * Robust variance is calculated if the "meat" matrix for the information sandwich is supplied.

//  */

// std::pair<std::vector<double>, std::vector<double>> glm_estimate_betas_and_var_covar(

//     GlmOutput const& output,

//     std::vector<double> const& meat = std::vector<double>{}

// );


double glm_estimate_intercept(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmOutput const&           output,

    std::vector<double> const& betas

);


double glm_estimate_intercept(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmLink const&             link,

    GlmOutput const&           output,

    std::vector<double> const& betas

);


std::vector<double> glm_coefficients(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmOutput const&           output

);


std::vector<double> glm_coefficients(

    Matrix<double> const&      x_predictors,

    std::vector<double> const& y_response,

    GlmLink const&             link,

    GlmOutput const&           output

);


} // namespace utils

} // namespace genesis


#endif // include guard