A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
statistics.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_MATRIX_STATISTICS_H_
2 #define GENESIS_UTILS_MATH_MATRIX_STATISTICS_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2017 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <utility>
38 #include <vector>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Min Max
45 // =================================================================================================
46 
52 template< typename T >
54 {
55  auto ret = MinMaxPair<T>{ 0.0, 0.0 };
56 
57  for( auto const& e : data ) {
58  ret.min = std::min( ret.min, e );
59  ret.max = std::max( ret.max, e );
60  }
61 
62  return ret;
63 }
64 
70 template< typename T >
71 std::vector<MinMaxPair<T>> matrix_col_minmax( Matrix<T> const& data )
72 {
73  auto ret = std::vector<MinMaxPair<T>>( data.cols(), { 0.0, 0.0 } );
74 
75  // Nothing to do.
76  if( data.rows() == 0 ) {
77  return ret;
78  }
79 
80  // Init with the first row.
81  for( size_t c = 0; c < data.cols(); ++c ) {
82  ret[ c ].min = data( 0, c );
83  ret[ c ].max = data( 0, c );
84  }
85 
86  // Now go through all other rows.
87  // Our Matrix is row-major, so this way we make best use of the cache.
88  for( size_t r = 1; r < data.rows(); ++r ) {
89  for( size_t c = 0; c < data.cols(); ++c ) {
90 
91  // Find min and max of the column.
92  ret[ c ].min = std::min( ret[ c ].min, data( r, c ) );
93  ret[ c ].max = std::max( ret[ c ].max, data( r, c ) );
94  }
95  }
96 
97  return ret;
98 }
99 
105 template< typename T >
106 std::vector<MinMaxPair<T>> matrix_row_minmax( Matrix<T> const& data )
107 {
108  auto ret = std::vector<MinMaxPair<T>>( data.rows(), { 0.0, 0.0 } );
109 
110  // Nothing to do.
111  if( data.cols() == 0 ) {
112  return ret;
113  }
114 
115  for( size_t r = 0; r < data.rows(); ++r ) {
116  // Init with the first col.
117  ret[ r ].min = data( r, 0 );
118  ret[ r ].max = data( r, 0 );
119 
120  // Go through all other cols.
121  for( size_t c = 1; c < data.cols(); ++c ) {
122  ret[ r ].min = std::min( ret[ r ].min, data( r, c ) );
123  ret[ r ].max = std::max( ret[ r ].max, data( r, c ) );
124  }
125  }
126 
127  return ret;
128 }
129 
135 template< typename T >
136 T matrix_sum( Matrix<T> const& data )
137 {
138  // Get row sums.
139  auto sum = T{};
140  for( auto const& e : data ) {
141  sum += e;
142  }
143  return sum;
144 }
145 
151 template< typename T >
152 std::vector<T> matrix_row_sums( Matrix<T> const& data )
153 {
154  // Get row sums.
155  auto row_sums = std::vector<T>( data.rows(), T{} );
156  for( size_t i = 0; i < data.rows(); ++i ) {
157  for( size_t j = 0; j < data.cols(); ++j ) {
158  row_sums[ i ] += data( i, j );
159  }
160  }
161  return row_sums;
162 }
163 
169 template< typename T >
170 std::vector<T> matrix_col_sums( Matrix<T> const& data )
171 {
172  // Get col sums.
173  auto col_sums = std::vector<T>( data.cols(), T{} );
174  for( size_t i = 0; i < data.rows(); ++i ) {
175  for( size_t j = 0; j < data.cols(); ++j ) {
176  col_sums[ j ] += data( i, j );
177  }
178  }
179  return col_sums;
180 }
181 
182 // =================================================================================================
183 // Normalization and Standardization
184 // =================================================================================================
185 
198 std::vector<MinMaxPair<double>> normalize_cols( Matrix<double>& data );
199 
212 std::vector<MinMaxPair<double>> normalize_rows( Matrix<double>& data );
213 
231 std::vector<MeanStddevPair> standardize_cols(
232  Matrix<double>& data,
233  bool scale_means = true,
234  bool scale_std = true
235 );
236 
254 std::vector<MeanStddevPair> standardize_rows(
255  Matrix<double>& data,
256  bool scale_means = true,
257  bool scale_std = true
258 );
259 
260 // =================================================================================================
261 // Mean and Stddev
262 // =================================================================================================
263 
275 MeanStddevPair matrix_mean_stddev(
276  Matrix<double> const& data,
277  double epsilon = -1.0
278 );
279 
291 std::vector<MeanStddevPair> matrix_col_mean_stddev(
292  Matrix<double> const& data,
293  double epsilon = -1.0
294 );
295 
307 std::vector<MeanStddevPair> matrix_row_mean_stddev(
308  Matrix<double> const& data,
309  double epsilon = -1.0
310 );
311 
312 // =================================================================================================
313 // Quartiles
314 // =================================================================================================
315 
319 Quartiles matrix_quartiles(
320  Matrix<double> const& data
321 );
322 
323 Quartiles matrix_row_quartiles(
324  Matrix<double> const& data,
325  size_t row
326 );
327 
328 std::vector<Quartiles> matrix_row_quartiles(
329  Matrix<double> const& data
330 );
331 
332 Quartiles matrix_col_quartiles(
333  Matrix<double> const& data,
334  size_t col
335 );
336 
337 std::vector<Quartiles> matrix_col_quartiles(
338  Matrix<double> const& data
339 );
340 
341 // =================================================================================================
342 // Correlation and Covariance
343 // =================================================================================================
344 
351 Matrix<double> correlation_matrix( Matrix<double> const& data );
352 
359 Matrix<double> covariance_matrix( Matrix<double> const& data );
360 
364 Matrix<double> sums_of_squares_and_cross_products_matrix( Matrix<double> const& data );
365 
366 // =================================================================================================
367 // Correlation Coefficient
368 // =================================================================================================
369 
378  Matrix<double> const& mat1, size_t col1,
379  Matrix<double> const& mat2, size_t col2
380 );
381 
390  Matrix<double> const& mat1, size_t row1,
391  Matrix<double> const& mat2, size_t row2
392 );
393 
402  Matrix<double> const& mat1, size_t col1,
403  Matrix<double> const& mat2, size_t col2
404 );
405 
414  Matrix<double> const& mat1, size_t row1,
415  Matrix<double> const& mat2, size_t row2
416 );
417 
418 } // namespace utils
419 } // namespace genesis
420 
421 #endif // include guard
size_t cols() const
Definition: matrix.hpp:156
Store a pair of min and max values.
Definition: common.hpp:61
double matrix_row_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate the Pearson Correlation Coefficient between two row of two Matrices.
Definition: statistics.cpp:388
std::vector< MinMaxPair< T > > matrix_col_minmax(Matrix< T > const &data)
Calculate the column-wise min and max values of a Matrix.
Definition: statistics.hpp:71
std::vector< MeanStddevPair > standardize_rows(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the rows of a Matrix by subtracting the mean and scaling to unit variance.
Definition: statistics.cpp:122
T matrix_sum(Matrix< T > const &data)
Calculate the sum of all elements in a Matrix.
Definition: statistics.hpp:136
double sum(const Histogram &h)
std::vector< MeanStddevPair > matrix_row_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the row-wise mean and standard deviation of a Matrix.
Definition: statistics.cpp:203
Matrix< double > correlation_matrix(Matrix< double > const &data)
Calculate the correlation Matrix of a given data Matrix.
Definition: statistics.cpp:311
Quartiles matrix_quartiles(Matrix< double > const &data)
Calculate the Quartiles of the elmements in Matrix of double.
Definition: statistics.cpp:250
std::vector< MinMaxPair< T > > matrix_row_minmax(Matrix< T > const &data)
Calculate the row-wise min and max values of a Matrix.
Definition: statistics.hpp:106
Quartiles matrix_col_quartiles(Matrix< double > const &data, size_t col)
Definition: statistics.cpp:283
std::vector< T > matrix_col_sums(Matrix< T > const &data)
Calculate the sum of each column and return the result as a vector.
Definition: statistics.hpp:170
double matrix_col_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate the Pearson Correlation Coefficient between two columns of two Matrices.
Definition: statistics.cpp:369
std::vector< MeanStddevPair > matrix_col_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the column-wise mean and standard deviation of a Matrix.
Definition: statistics.cpp:160
std::vector< T > matrix_row_sums(Matrix< T > const &data)
Calculate the sum of each row and return the result as a vector.
Definition: statistics.hpp:152
std::vector< MinMaxPair< double > > normalize_cols(Matrix< double > &data)
Normalize the columns of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: statistics.cpp:51
MeanStddevPair matrix_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the mean and standard deviation of all elements in a Matrix.
Definition: statistics.cpp:155
MinMaxPair< T > matrix_minmax(Matrix< T > const &data)
Calculate the min and max values of a Matrix.
Definition: statistics.hpp:53
std::vector< MinMaxPair< double > > normalize_rows(Matrix< double > &data)
Normalize the rows of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: statistics.cpp:70
size_t rows() const
Definition: matrix.hpp:151
Quartiles matrix_row_quartiles(Matrix< double > const &data, size_t row)
Definition: statistics.cpp:259
Matrix< double > covariance_matrix(Matrix< double > const &data)
Calculate the covariance Matrix of a given data Matrix.
Definition: statistics.cpp:329
double matrix_row_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate Spearman's Rank Correlation Coefficient between two row of two Matrices.
Definition: statistics.cpp:425
Matrix< double > sums_of_squares_and_cross_products_matrix(Matrix< double > const &data)
Calculate the Sums of Squares and Cross Products Matrix (SSCP Matrix).
Definition: statistics.cpp:347
double matrix_col_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate Spearman's Rank Correlation Coefficient between two columns of two Matrices.
Definition: statistics.cpp:407
std::vector< MeanStddevPair > standardize_cols(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the columns of a Matrix by subtracting the mean and scaling to unit variance.
Definition: statistics.cpp:93