A toolkit for working with phylogenetic data.
v0.20.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
math/matrix.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_MATRIX_H_
2 #define GENESIS_UTILS_MATH_MATRIX_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <vector>
38 
39 namespace genesis {
40 namespace utils {
41 
42 // =================================================================================================
43 // Min Max
44 // =================================================================================================
45 
51 template< typename T >
53 {
54  // Edge case
55  if( data.rows() == 0 || data.cols() == 0 ) {
56  return { 0.0, 0.0 };
57  }
58 
59  // Init with first element
60  auto ret = MinMaxPair<T>{ data( 0, 0 ), data( 0, 0 ) };
61 
62  // Iterate
63  for( auto const& e : data ) {
64  ret.min = std::min( ret.min, e );
65  ret.max = std::max( ret.max, e );
66  }
67 
68  return ret;
69 }
70 
76 template< typename T >
77 std::vector<MinMaxPair<T>> matrix_col_minmax( Matrix<T> const& data )
78 {
79  auto ret = std::vector<MinMaxPair<T>>( data.cols(), { 0.0, 0.0 } );
80 
81  // Nothing to do.
82  if( data.rows() == 0 ) {
83  return ret;
84  }
85 
86  // Init with the first row.
87  for( size_t c = 0; c < data.cols(); ++c ) {
88  ret[ c ].min = data( 0, c );
89  ret[ c ].max = data( 0, c );
90  }
91 
92  // Now go through all other rows.
93  // Our Matrix is row-major, so this way we make best use of the cache.
94  for( size_t r = 1; r < data.rows(); ++r ) {
95  for( size_t c = 0; c < data.cols(); ++c ) {
96 
97  // Find min and max of the column.
98  ret[ c ].min = std::min( ret[ c ].min, data( r, c ) );
99  ret[ c ].max = std::max( ret[ c ].max, data( r, c ) );
100  }
101  }
102 
103  return ret;
104 }
105 
111 template< typename T >
112 std::vector<MinMaxPair<T>> matrix_row_minmax( Matrix<T> const& data )
113 {
114  auto ret = std::vector<MinMaxPair<T>>( data.rows(), { 0.0, 0.0 } );
115 
116  // Nothing to do.
117  if( data.cols() == 0 ) {
118  return ret;
119  }
120 
121  for( size_t r = 0; r < data.rows(); ++r ) {
122  // Init with the first col.
123  ret[ r ].min = data( r, 0 );
124  ret[ r ].max = data( r, 0 );
125 
126  // Go through all other cols.
127  for( size_t c = 1; c < data.cols(); ++c ) {
128  ret[ r ].min = std::min( ret[ r ].min, data( r, c ) );
129  ret[ r ].max = std::max( ret[ r ].max, data( r, c ) );
130  }
131  }
132 
133  return ret;
134 }
135 
141 template< typename T >
142 T matrix_sum( Matrix<T> const& data )
143 {
144  // Get row sums.
145  auto sum = T{};
146  for( auto const& e : data ) {
147  sum += e;
148  }
149  return sum;
150 }
151 
157 template< typename T >
158 std::vector<T> matrix_row_sums( Matrix<T> const& data )
159 {
160  // Get row sums.
161  auto row_sums = std::vector<T>( data.rows(), T{} );
162  for( size_t i = 0; i < data.rows(); ++i ) {
163  for( size_t j = 0; j < data.cols(); ++j ) {
164  row_sums[ i ] += data( i, j );
165  }
166  }
167  return row_sums;
168 }
169 
175 template< typename T >
176 std::vector<T> matrix_col_sums( Matrix<T> const& data )
177 {
178  // Get col sums.
179  auto col_sums = std::vector<T>( data.cols(), T{} );
180  for( size_t i = 0; i < data.rows(); ++i ) {
181  for( size_t j = 0; j < data.cols(); ++j ) {
182  col_sums[ j ] += data( i, j );
183  }
184  }
185  return col_sums;
186 }
187 
188 // =================================================================================================
189 // Normalization and Standardization
190 // =================================================================================================
191 
204 std::vector<MinMaxPair<double>> normalize_cols( Matrix<double>& data );
205 
218 std::vector<MinMaxPair<double>> normalize_rows( Matrix<double>& data );
219 
237 std::vector<MeanStddevPair> standardize_cols(
238  Matrix<double>& data,
239  bool scale_means = true,
240  bool scale_std = true
241 );
242 
260 std::vector<MeanStddevPair> standardize_rows(
261  Matrix<double>& data,
262  bool scale_means = true,
263  bool scale_std = true
264 );
265 
266 // =================================================================================================
267 // Mean and Stddev
268 // =================================================================================================
269 
281 MeanStddevPair matrix_mean_stddev(
282  Matrix<double> const& data,
283  double epsilon = -1.0
284 );
285 
297 std::vector<MeanStddevPair> matrix_col_mean_stddev(
298  Matrix<double> const& data,
299  double epsilon = -1.0
300 );
301 
313 std::vector<MeanStddevPair> matrix_row_mean_stddev(
314  Matrix<double> const& data,
315  double epsilon = -1.0
316 );
317 
318 // =================================================================================================
319 // Quartiles
320 // =================================================================================================
321 
325 Quartiles matrix_quartiles(
326  Matrix<double> const& data
327 );
328 
329 Quartiles matrix_row_quartiles(
330  Matrix<double> const& data,
331  size_t row
332 );
333 
334 std::vector<Quartiles> matrix_row_quartiles(
335  Matrix<double> const& data
336 );
337 
338 Quartiles matrix_col_quartiles(
339  Matrix<double> const& data,
340  size_t col
341 );
342 
343 std::vector<Quartiles> matrix_col_quartiles(
344  Matrix<double> const& data
345 );
346 
347 // =================================================================================================
348 // Correlation and Covariance
349 // =================================================================================================
350 
357 Matrix<double> correlation_matrix( Matrix<double> const& data );
358 
365 Matrix<double> covariance_matrix( Matrix<double> const& data );
366 
370 Matrix<double> sums_of_squares_and_cross_products_matrix( Matrix<double> const& data );
371 
372 // =================================================================================================
373 // Correlation Coefficients
374 // =================================================================================================
375 
384  Matrix<double> const& mat1, size_t col1,
385  Matrix<double> const& mat2, size_t col2
386 );
387 
396  Matrix<double> const& mat1, size_t row1,
397  Matrix<double> const& mat2, size_t row2
398 );
399 
408  Matrix<double> const& mat1, size_t col1,
409  Matrix<double> const& mat2, size_t col2
410 );
411 
420  Matrix<double> const& mat1, size_t row1,
421  Matrix<double> const& mat2, size_t row2
422 );
423 
424 // =================================================================================================
425 // Sorting
426 // =================================================================================================
427 
439 template< typename T >
441 {
442  if( data.rows() != data.cols() ) {
443  throw std::runtime_error( "Symmetric sort only works on square matrices." );
444  }
445 
446  auto result = Matrix<T>( data.rows(), data.cols() );
447  auto row_sums = matrix_row_sums( data );
448  auto si = sort_indices( row_sums.begin(), row_sums.end() );
449 
450  for( size_t i = 0; i < data.rows(); ++i ) {
451  for( size_t j = 0; j < data.cols(); ++j ) {
452  result( i, j ) = data( si[i], si[j] );
453  }
454  }
455 
456  return result;
457 }
458 
470 template< typename T >
472 {
473  if( data.rows() != data.cols() ) {
474  throw std::runtime_error( "Symmetric sort only works on square matrices." );
475  }
476 
477  auto result = Matrix<T>( data.rows(), data.cols() );
478  auto col_sums = matrix_col_sums( data );
479  auto si = sort_indices( col_sums.begin(), col_sums.end() );
480 
481  for( size_t i = 0; i < data.rows(); ++i ) {
482  for( size_t j = 0; j < data.cols(); ++j ) {
483  result( i, j ) = data( si[i], si[j] );
484  }
485  }
486 
487  return result;
488 }
489 
498 template< typename T >
500 {
501  if( data.rows() != data.cols() ) {
502  throw std::runtime_error( "Symmetric sort only works on square matrices." );
503  }
504 
505  // Find the row and col that contains the max element in the rest of the matrix,
506  // that is, excluding the first rows and cols according to start.
507  auto find_max = []( Matrix<T> const& mat, size_t start ){
508  size_t max_r = start;
509  size_t max_c = start;
510  for( size_t r = start; r < mat.rows(); ++r ) {
511  for( size_t c = start; c < mat.cols(); ++c ) {
512  if( mat(r, c) > mat( max_r, max_c ) ) {
513  max_r = r;
514  max_c = c;
515  }
516  }
517  }
518  return std::make_pair( max_r, max_c );
519  };
520 
521  // Sort by swapping rows and cols.
522  auto mat = data;
523  assert( mat.rows() == mat.cols() );
524  for( size_t i = 0; i < mat.rows(); ++i ) {
525  auto const max_pair = find_max( mat, i );
526  matrix_swap_rows( mat, i, max_pair.first );
527  matrix_swap_cols( mat, i, max_pair.second );
528  }
529  return mat;
530 }
531 
532 // =================================================================================================
533 // Matrix Addition
534 // =================================================================================================
535 
541 template< typename T = double, typename A = double, typename B = double >
543 {
544  if( a.rows() != b.rows() || a.cols() != b.cols() ) {
545  throw std::runtime_error( "Cannot add matrices with different dimensions." );
546  }
547 
548  auto result = Matrix<T>( a.rows(), a.cols() );
549  for( size_t r = 0; r < a.rows(); ++r ) {
550  for( size_t c = 0; c < a.cols(); ++c ) {
551  result( r, c ) = a( r, c ) + b( r, c );
552  }
553  }
554  return result;
555 }
556 
560 template< typename T = double, typename A = double, typename B = double >
561 Matrix<T> matrix_addition( Matrix<A> const& matrix, B const& scalar )
562 {
563  auto result = Matrix<T>( matrix.rows(), matrix.cols() );
564  for( size_t r = 0; r < matrix.rows(); ++r ) {
565  for( size_t c = 0; c < matrix.cols(); ++c ) {
566  result( r, c ) = matrix( r, c ) + scalar;
567  }
568  }
569  return result;
570 }
571 
577 template< typename T = double, typename A = double, typename B = double >
579 {
580  if( a.rows() != b.rows() || a.cols() != b.cols() ) {
581  throw std::runtime_error( "Cannot add matrices with different dimensions." );
582  }
583 
584  auto result = Matrix<T>( a.rows(), a.cols() );
585  for( size_t r = 0; r < a.rows(); ++r ) {
586  for( size_t c = 0; c < a.cols(); ++c ) {
587  result( r, c ) = a( r, c ) - b( r, c );
588  }
589  }
590  return result;
591 }
592 
593 // =================================================================================================
594 // Matrix Multiplication
595 // =================================================================================================
596 
603 template< typename T = double, typename A = double, typename B = double >
605 {
606  if( a.cols() != b.rows() ) {
607  throw std::runtime_error( "Cannot multiply matrices if a.cols() != b.rows()." );
608  }
609 
610  // Simple and naive. Fast enough for the few occasions were we need this.
611  // If Genesis at some point starts to need more elaborate matrix operations, it might be
612  // worth including some proper library for this.
613  auto result = Matrix<T>( a.rows(), b.cols() );
614  for( size_t r = 0; r < a.rows(); ++r ) {
615  for( size_t c = 0; c < b.cols(); ++c ) {
616  result( r, c ) = 0.0;
617  for( size_t j = 0; j < a.cols(); ++j ) {
618  result( r, c ) += a( r, j ) * b( j, c );
619  }
620  }
621  }
622 
623  return result;
624 }
625 
633 template< typename T = double, typename A = double, typename B = double >
634 std::vector<T> matrix_multiplication( std::vector<A> const& a, Matrix<B> const& b )
635 {
636  if( a.size() != b.rows() ) {
637  throw std::runtime_error( "Cannot multiply vector with matrix if a.size() != b.rows()." );
638  }
639 
640  auto result = std::vector<T>( b.cols(), 0.0 );
641  for( size_t c = 0; c < b.cols(); ++c ) {
642  for( size_t j = 0; j < a.size(); ++j ) {
643  result[ c ] += a[ j ] * b( j, c );
644  }
645  }
646 
647  return result;
648 }
649 
657 template< typename T = double, typename A = double, typename B = double >
658 std::vector<T> matrix_multiplication( Matrix<A> const& a, std::vector<B> const& b )
659 {
660  if( a.cols() != b.size() ) {
661  throw std::runtime_error( "Cannot multiply matrix with vector if a.cols() != b.size()." );
662  }
663 
664  auto result = std::vector<T>( a.rows(), 0.0 );
665  for( size_t r = 0; r < a.rows(); ++r ) {
666  for( size_t j = 0; j < a.cols(); ++j ) {
667  result[ r ] += a( r, j ) * b[ j ];
668  }
669  }
670 
671  return result;
672 }
673 
677 template< typename T = double, typename A = double, typename B = double >
678 Matrix<T> matrix_multiplication( Matrix<A> const& matrix, B const& scalar )
679 {
680  auto result = Matrix<T>( matrix.rows(), matrix.cols() );
681  for( size_t r = 0; r < matrix.rows(); ++r ) {
682  for( size_t c = 0; c < matrix.cols(); ++c ) {
683  result( r, c ) = matrix( r, c ) * scalar;
684  }
685  }
686  return result;
687 }
688 
689 } // namespace utils
690 } // namespace genesis
691 
692 #endif // include guard
Matrix< T > matrix_multiplication(Matrix< A > const &a, Matrix< B > const &b)
Calculate the product of two Matrices.
Store a pair of min and max values.
Definition: statistics.hpp:65
double matrix_row_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate the Pearson Correlation Coefficient between two row of two Matrices.
Definition: matrix.cpp:384
std::vector< MinMaxPair< T > > matrix_col_minmax(Matrix< T > const &data)
Calculate the column-wise min and max values of a Matrix.
Definition: math/matrix.hpp:77
void matrix_swap_cols(Matrix< T > &data, size_t col_a, size_t col_b)
Swap (interchange) two columns of a Matrix, given their indices.
Matrix< T > matrix_sort_by_row_sum_symmetric(Matrix< T > const &data)
Sort rows and columns of a Matrix by the sum or the rows.
void matrix_swap_rows(Matrix< T > &data, size_t row_a, size_t row_b)
Swap (interchange) two rows of a Matrix, given their indices.
std::vector< MeanStddevPair > standardize_rows(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the rows of a Matrix by subtracting the mean and scaling to unit variance.
Definition: matrix.cpp:122
Matrix< T > matrix_subtraction(Matrix< A > const &a, Matrix< B > const &b)
Calculate the element-wise difference of two Matrices.
T matrix_sum(Matrix< T > const &data)
Calculate the sum of all elements in a Matrix.
double sum(const Histogram &h)
std::vector< MeanStddevPair > matrix_row_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the row-wise mean and standard deviation of a Matrix.
Definition: matrix.cpp:203
Matrix< double > correlation_matrix(Matrix< double > const &data)
Calculate the correlation Matrix of a given data Matrix.
Definition: matrix.cpp:309
Quartiles matrix_quartiles(Matrix< double > const &data)
Calculate the Quartiles of the elmements in Matrix of double.
Definition: matrix.cpp:250
std::vector< size_t > sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the sorted order of the given range.
Definition: algorithm.hpp:124
std::vector< MinMaxPair< T > > matrix_row_minmax(Matrix< T > const &data)
Calculate the row-wise min and max values of a Matrix.
Quartiles matrix_col_quartiles(Matrix< double > const &data, size_t col)
Definition: matrix.cpp:282
std::vector< T > matrix_col_sums(Matrix< T > const &data)
Calculate the sum of each column and return the result as a vector.
double matrix_col_pearson_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate the Pearson Correlation Coefficient between two columns of two Matrices.
Definition: matrix.cpp:367
std::vector< MeanStddevPair > matrix_col_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the column-wise mean and standard deviation of a Matrix.
Definition: matrix.cpp:160
std::vector< T > matrix_row_sums(Matrix< T > const &data)
Calculate the sum of each row and return the result as a vector.
std::vector< MinMaxPair< double > > normalize_cols(Matrix< double > &data)
Normalize the columns of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: matrix.cpp:51
MeanStddevPair matrix_mean_stddev(Matrix< double > const &data, double epsilon)
Calcualte the mean and standard deviation of all elements in a Matrix.
Definition: matrix.cpp:155
MinMaxPair< T > matrix_minmax(Matrix< T > const &data)
Calculate the min and max values of a Matrix.
Definition: math/matrix.hpp:52
std::vector< MinMaxPair< double > > normalize_rows(Matrix< double > &data)
Normalize the rows of a Matrix so that all values are in the range [ 0.0, 1.0 ].
Definition: matrix.cpp:70
Matrix< T > matrix_sort_by_col_sum_symmetric(Matrix< T > const &data)
Sort rows and columns of a Matrix by the sum or the columns.
Quartiles matrix_row_quartiles(Matrix< double > const &data, size_t row)
Definition: matrix.cpp:259
Matrix< double > covariance_matrix(Matrix< double > const &data)
Calculate the covariance Matrix of a given data Matrix.
Definition: matrix.cpp:327
Matrix< T > matrix_addition(Matrix< A > const &a, Matrix< B > const &b)
Calculate the element-wise sum of two Matrices.
double matrix_row_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t row1, Matrix< double > const &mat2, size_t row2)
Calculate Spearman's Rank Correlation Coefficient between two row of two Matrices.
Definition: matrix.cpp:417
Matrix< T > matrix_sort_diagonal_symmetric(Matrix< T > const &data)
Sort a Matrix so that the highest entries are on the diagonal.
Matrix< double > sums_of_squares_and_cross_products_matrix(Matrix< double > const &data)
Calculate the Sums of Squares and Cross Products Matrix (SSCP Matrix).
Definition: matrix.cpp:345
double matrix_col_spearmans_rank_correlation_coefficient(Matrix< double > const &mat1, size_t col1, Matrix< double > const &mat2, size_t col2)
Calculate Spearman's Rank Correlation Coefficient between two columns of two Matrices.
Definition: matrix.cpp:401
std::vector< MeanStddevPair > standardize_cols(Matrix< double > &data, bool scale_means, bool scale_std)
Standardize the columns of a Matrix by subtracting the mean and scaling to unit variance.
Definition: matrix.cpp:93