A toolkit for working with phylogenetic data.
v0.19.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
math/matrix.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_MATRIX_H_
2 #define GENESIS_UTILS_MATH_MATRIX_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <vector>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // =================================================================================================
43 // Min Max
44 // =================================================================================================
45 
51 template< typename T >
53 {
54  auto ret = MinMaxPair<T>{ 0.0, 0.0 };
55 
56  for( auto const& e : data ) {
57  ret.min = std::min( ret.min, e );
58  ret.max = std::max( ret.max, e );
59  }
60 
61  return ret;
62 }
63 
69 template< typename T >
70 std::vector<MinMaxPair<T>> matrix_col_minmax( Matrix<T> const& data )
71 {
72  auto ret = std::vector<MinMaxPair<T>>( data.cols(), { 0.0, 0.0 } );
73 
74  // Nothing to do.
75  if( data.rows() == 0 ) {
76  return ret;
77  }
78 
79  // Init with the first row.
80  for( size_t c = 0; c < data.cols(); ++c ) {
81  ret[ c ].min = data( 0, c );
82  ret[ c ].max = data( 0, c );
83  }
84 
85  // Now go through all other rows.
86  // Our Matrix is row-major, so this way we make best use of the cache.
87  for( size_t r = 1; r < data.rows(); ++r ) {
88  for( size_t c = 0; c < data.cols(); ++c ) {
89 
90  // Find min and max of the column.
91  ret[ c ].min = std::min( ret[ c ].min, data( r, c ) );
92  ret[ c ].max = std::max( ret[ c ].max, data( r, c ) );
93  }
94  }
95 
96  return ret;
97 }
98 
104 template< typename T >
105 std::vector<MinMaxPair<T>> matrix_row_minmax( Matrix<T> const& data )
106 {
107  auto ret = std::vector<MinMaxPair<T>>( data.rows(), { 0.0, 0.0 } );
108 
109  // Nothing to do.
110  if( data.cols() == 0 ) {
111  return ret;
112  }
113 
114  for( size_t r = 0; r < data.rows(); ++r ) {
115  // Init with the first col.
116  ret[ r ].min = data( r, 0 );
117  ret[ r ].max = data( r, 0 );
118 
119  // Go through all other cols.
120  for( size_t c = 1; c < data.cols(); ++c ) {
121  ret[ r ].min = std::min( ret[ r ].min, data( r, c ) );
122  ret[ r ].max = std::max( ret[ r ].max, data( r, c ) );
123  }
124  }
125 
126  return ret;
127 }
128 
134 template< typename T >
135 T matrix_sum( Matrix<T> const& data )
136 {
137  // Get row sums.
138  auto sum = T{};
139  for( auto const& e : data ) {
140  sum += e;
141  }
142  return sum;
143 }
144 
150 template< typename T >
151 std::vector<T> matrix_row_sums( Matrix<T> const& data )
152 {
153  // Get row sums.
154  auto row_sums = std::vector<T>( data.rows(), T{} );
155  for( size_t i = 0; i < data.rows(); ++i ) {
156  for( size_t j = 0; j < data.cols(); ++j ) {
157  row_sums[ i ] += data( i, j );
158  }
159  }
160  return row_sums;
161 }
162 
168 template< typename T >
169 std::vector<T> matrix_col_sums( Matrix<T> const& data )
170 {
171  // Get col sums.
172  auto col_sums = std::vector<T>( data.cols(), T{} );
173  for( size_t i = 0; i < data.rows(); ++i ) {
174  for( size_t j = 0; j < data.cols(); ++j ) {
175  col_sums[ j ] += data( i, j );
176  }
177  }
178  return col_sums;
179 }
180 
181 // =================================================================================================
182 // Normalization and Standardization
183 // =================================================================================================
184 
197 std::vector<MinMaxPair<double>> normalize_cols( Matrix<double>& data );
198 
211 std::vector<MinMaxPair<double>> normalize_rows( Matrix<double>& data );
212 
230 std::vector<MeanStddevPair> standardize_cols(
231  Matrix<double>& data,
232  bool scale_means = true,
233  bool scale_std = true
234 );
235 
253 std::vector<MeanStddevPair> standardize_rows(
254  Matrix<double>& data,
255  bool scale_means = true,
256  bool scale_std = true
257 );
258 
259 // =================================================================================================
260 // Mean and Stddev
261 // =================================================================================================
262 
274 MeanStddevPair matrix_mean_stddev(
275  Matrix<double> const& data,
276  double epsilon = -1.0
277 );
278 
290 std::vector<MeanStddevPair> matrix_col_mean_stddev(
291  Matrix<double> const& data,
292  double epsilon = -1.0
293 );
294 
306 std::vector<MeanStddevPair> matrix_row_mean_stddev(
307  Matrix<double> const& data,
308  double epsilon = -1.0
309 );
310 
311 // =================================================================================================
312 // Quartiles
313 // =================================================================================================
314 
318 Quartiles matrix_quartiles(
319  Matrix<double> const& data
320 );
321 
322 Quartiles matrix_row_quartiles(
323  Matrix<double> const& data,
324  size_t row
325 );
326 
327 std::vector<Quartiles> matrix_row_quartiles(
328  Matrix<double> const& data
329 );
330 
331 Quartiles matrix_col_quartiles(
332  Matrix<double> const& data,
333  size_t col
334 );
335 
336 std::vector<Quartiles> matrix_col_quartiles(
337  Matrix<double> const& data
338 );
339 
340 // =================================================================================================
341 // Correlation and Covariance
342 // =================================================================================================
343 
350 Matrix<double> correlation_matrix( Matrix<double> const& data );
351 
358 Matrix<double> covariance_matrix( Matrix<double> const& data );
359 
363 Matrix<double> sums_of_squares_and_cross_products_matrix( Matrix<double> const& data );
364 
365 // =================================================================================================
366 // Correlation Coefficients
367 // =================================================================================================
368 
377  Matrix<double> const& mat1, size_t col1,
378  Matrix<double> const& mat2, size_t col2
379 );
380 
389  Matrix<double> const& mat1, size_t row1,
390  Matrix<double> const& mat2, size_t row2
391 );
392 
401  Matrix<double> const& mat1, size_t col1,
402  Matrix<double> const& mat2, size_t col2
403 );
404 
413  Matrix<double> const& mat1, size_t row1,
414  Matrix<double> const& mat2, size_t row2
415 );
416 
417 // =================================================================================================
418 // Sorting
419 // =================================================================================================
420 
432 template< typename T >
434 {
435  if( data.rows() != data.cols() ) {
436  throw std::runtime_error( "Symmetric sort only works on square matrices." );
437  }
438 
439  auto result = Matrix<T>( data.rows(), data.cols() );
440  auto row_sums = matrix_row_sums( data );
441  auto si = sort_indices( row_sums.begin(), row_sums.end() );
442 
443  for( size_t i = 0; i < data.rows(); ++i ) {
444  for( size_t j = 0; j < data.cols(); ++j ) {
445  result( i, j ) = data( si[i], si[j] );
446  }
447  }
448 
449  return result;
450 }
451 
463 template< typename T >
465 {
466  if( data.rows() != data.cols() ) {
467  throw std::runtime_error( "Symmetric sort only works on square matrices." );
468  }
469 
470  auto result = Matrix<T>( data.rows(), data.cols() );
471  auto col_sums = matrix_col_sums( data );
472  auto si = sort_indices( col_sums.begin(), col_sums.end() );
473 
474  for( size_t i = 0; i < data.rows(); ++i ) {
475  for( size_t j = 0; j < data.cols(); ++j ) {
476  result( i, j ) = data( si[i], si[j] );
477  }
478  }
479 
480  return result;
481 }
482 
491 template< typename T >
493 {
494  if( data.rows() != data.cols() ) {
495  throw std::runtime_error( "Symmetric sort only works on square matrices." );
496  }
497 
498  // Find the row and col that contains the max element in the rest of the matrix,
499  // that is, excluding the first rows and cols according to start.
500  auto find_max = []( Matrix<T> const& mat, size_t start ){
501  size_t max_r = start;
502  size_t max_c = start;
503  for( size_t r = start; r < mat.rows(); ++r ) {
504  for( size_t c = start; c < mat.cols(); ++c ) {
505  if( mat(r, c) > mat( max_r, max_c ) ) {
506  max_r = r;
507  max_c = c;
508  }
509  }
510  }
511  return std::make_pair( max_r, max_c );
512  };
513 
514  // Sort by swapping rows and cols.
515  auto mat = data;
516  assert( mat.rows() == mat.cols() );
517  for( size_t i = 0; i < mat.rows(); ++i ) {
518  auto const max_pair = find_max( mat, i );
519  matrix_swap_rows( mat, i, max_pair.first );
520  matrix_swap_cols( mat, i, max_pair.second );
521  }
522  return mat;
523 }
524 
525 // =================================================================================================
526 // Matrix Addition
527 // =================================================================================================
528 
534 template< typename T = double, typename A = double, typename B = double >
536 {
537  if( a.rows() != b.rows() || a.cols() != b.cols() ) {
538  throw std::runtime_error( "Cannot add matrices with different dimensions." );
539  }
540 
541  auto result = Matrix<T>( a.rows(), a.cols() );
542  for( size_t r = 0; r < a.rows(); ++r ) {
543  for( size_t c = 0; c < a.cols(); ++c ) {
544  result( r, c ) = a( r, c ) + b( r, c );
545  }
546  }
547  return result;
548 }
549 
553 template< typename T = double, typename A = double, typename B = double >
554 Matrix<T> matrix_addition( Matrix<A> const& matrix, B const& scalar )
555 {
556  auto result = Matrix<T>( matrix.rows(), matrix.cols() );
557  for( size_t r = 0; r < matrix.rows(); ++r ) {
558  for( size_t c = 0; c < matrix.cols(); ++c ) {
559  result( r, c ) = matrix( r, c ) + scalar;
560  }
561  }
562  return result;
563 }
564 
570 template< typename T = double, typename A = double, typename B = double >
572 {
573  if( a.rows() != b.rows() || a.cols() != b.cols() ) {
574  throw std::runtime_error( "Cannot add matrices with different dimensions." );
575  }
576 
577  auto result = Matrix<T>( a.rows(), a.cols() );
578  for( size_t r = 0; r < a.rows(); ++r ) {
579  for( size_t c = 0; c < a.cols(); ++c ) {
580  result( r, c ) = a( r, c ) - b( r, c );
581  }
582  }
583  return result;
584 }
585 
586 // =================================================================================================
587 // Matrix Multiplication
588 // =================================================================================================
589 
596 template< typename T = double, typename A = double, typename B = double >
598 {
599  if( a.cols() != b.rows() ) {
600  throw std::runtime_error( "Cannot multiply matrices if a.cols() != b.rows()." );
601  }
602 
603  // Simple and naive. Fast enough for the few occasions were we need this.
604  // If Genesis at some point starts to need more elaborate matrix operations, it might be
605  // worth including some proper library for this.
606  auto result = Matrix<T>( a.rows(), b.cols() );
607  for( size_t r = 0; r < a.rows(); ++r ) {
608  for( size_t c = 0; c < b.cols(); ++c ) {
609  result( r, c ) = 0.0;
610  for( size_t j = 0; j < a.cols(); ++j ) {
611  result( r, c ) += a( r, j ) * b( j, c );
612  }
613  }
614  }
615 
616  return result;
617 }
618 
626 template< typename T = double, typename A = double, typename B = double >
627 std::vector<T> matrix_multiplication( std::vector<A> const& a, Matrix<B> const& b )
628 {
629  if( a.size() != b.rows() ) {
630  throw std::runtime_error( "Cannot multiply vector with matrix if a.size() != b.rows()." );
631  }
632 
633  auto result = std::vector<T>( b.cols(), 0.0 );
634  for( size_t c = 0; c < b.cols(); ++c ) {
635  for( size_t j = 0; j < a.size(); ++j ) {
636  result[ c ] += a[ j ] * b( j, c );
637  }
638  }
639 
640  return result;
641 }
642 
650 template< typename T = double, typename A = double, typename B = double >
651 std::vector<T> matrix_multiplication( Matrix<A> const& a, std::vector<B> const& b )
652 {
653  if( a.cols() != b.size() ) {
654  throw std::runtime_error( "Cannot multiply matrix with vector if a.cols() != b.size()." );
655  }
656 
657  auto result = std::vector<T>( a.rows(), 0.0 );
658  for( size_t r = 0; r < a.rows(); ++r ) {
659  for( size_t j = 0; j < a.cols(); ++j ) {
660  result[ r ] += a( r, j ) * b[ j ];
661  }
662  }
663 
664  return result;
665 }
666 
670 template< typename T = double, typename A = double, typename B = double >
671 Matrix<T> matrix_multiplication( Matrix<A> const& matrix, B const& scalar )
672 {
673  auto result = Matrix<T>( matrix.rows(), matrix.cols() );
674  for( size_t r = 0; r < matrix.rows(); ++r ) {
675  for( size_t c = 0; c < matrix.cols(); ++c ) {
676  result( r, c ) = matrix( r, c ) * scalar;
677  }
678  }
679  return result;
680 }
681 
682 } // namespace utils
683 } // namespace genesis
684 
685 #endif // include guard
Matrix< T > matrix_multiplication(Matrix< A > const &a, Matrix< B > const &b)
Calculate the product of two Matrices.
Store a pair of min and max values.
Definition: statistics.hpp:65
double matrix_row_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate the Pearson Correlation Coefficient between two row of two Matrices.
Definition: matrix.cpp:384
std::vector< MinMaxPair< T > > matrix_col_minmax(Matrix< T > const &data)
Calculate the column-wise min and max values of a Matrix.
Definition: math/matrix.hpp:70
void matrix_swap_cols(Matrix< T > &data, size_t col_a, size_t col_b)
Swap (interchange) two columns of a Matrix, given their indices.
Matrix< T > matrix_sort_by_row_sum_symmetric(Matrix< T > const &data)
Sort rows and columns of a Matrix by the sum or the rows.
void matrix_swap_rows(Matrix< T > &data, size_t row_a, size_t row_b)
Swap (interchange) two rows of a Matrix, given their indices.
std::vector< MeanStddevPair > standardize_rows(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the rows of a Matrix by subtracting the mean and scaling to unit variance.
Definition: matrix.cpp:122
Matrix< T > matrix_subtraction(Matrix< A > const &a, Matrix< B > const &b)
Calculate the element-wise difference of two Matrices.
T matrix_sum(Matrix< T > const &data)
Calculate the sum of all elements in a Matrix.
double sum(const Histogram &h)
std::vector< MeanStddevPair > matrix_row_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the row-wise mean and standard deviation of a Matrix.
Definition: matrix.cpp:203
Matrix< double > correlation_matrix(Matrix< double > const &data)
Calculate the correlation Matrix of a given data Matrix.
Definition: matrix.cpp:309
Quartiles matrix_quartiles(Matrix< double > const &data)
Calculate the Quartiles of the elmements in Matrix of double.
Definition: matrix.cpp:250
std::vector< size_t > sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the sorted order of the given range.
Definition: algorithm.hpp:124
std::vector< MinMaxPair< T > > matrix_row_minmax(Matrix< T > const &data)
Calculate the row-wise min and max values of a Matrix.
Quartiles matrix_col_quartiles(Matrix< double > const &data, size_t col)
Definition: matrix.cpp:282
std::vector< T > matrix_col_sums(Matrix< T > const &data)
Calculate the sum of each column and return the result as a vector.
double matrix_col_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate the Pearson Correlation Coefficient between two columns of two Matrices.
Definition: matrix.cpp:367
std::vector< MeanStddevPair > matrix_col_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the column-wise mean and standard deviation of a Matrix.
Definition: matrix.cpp:160
std::vector< T > matrix_row_sums(Matrix< T > const &data)
Calculate the sum of each row and return the result as a vector.
std::vector< MinMaxPair< double > > normalize_cols(Matrix< double > &data)
Normalize the columns of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: matrix.cpp:51
MeanStddevPair matrix_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the mean and standard deviation of all elements in a Matrix.
Definition: matrix.cpp:155
MinMaxPair< T > matrix_minmax(Matrix< T > const &data)
Calculate the min and max values of a Matrix.
Definition: math/matrix.hpp:52
std::vector< MinMaxPair< double > > normalize_rows(Matrix< double > &data)
Normalize the rows of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: matrix.cpp:70
Matrix< T > matrix_sort_by_col_sum_symmetric(Matrix< T > const &data)
Sort rows and columns of a Matrix by the sum or the columns.
Quartiles matrix_row_quartiles(Matrix< double > const &data, size_t row)
Definition: matrix.cpp:259
Matrix< double > covariance_matrix(Matrix< double > const &data)
Calculate the covariance Matrix of a given data Matrix.
Definition: matrix.cpp:327
Matrix< T > matrix_addition(Matrix< A > const &a, Matrix< B > const &b)
Calculate the element-wise sum of two Matrices.
double matrix_row_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate Spearman's Rank Correlation Coefficient between two row of two Matrices.
Definition: matrix.cpp:417
Matrix< T > matrix_sort_diagonal_symmetric(Matrix< T > const &data)
Sort a Matrix so that the highest entries are on the diagonal.
Matrix< double > sums_of_squares_and_cross_products_matrix(Matrix< double > const &data)
Calculate the Sums of Squares and Cross Products Matrix (SSCP Matrix).
Definition: matrix.cpp:345
double matrix_col_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate Spearman's Rank Correlation Coefficient between two columns of two Matrices.
Definition: matrix.cpp:401
std::vector< MeanStddevPair > standardize_cols(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the columns of a Matrix by subtracting the mean and scaling to unit variance.
Definition: matrix.cpp:93