A library for working with phylogenetic and population genetic data.
v0.32.0
distance.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_DISTANCE_H_
2 #define GENESIS_UTILS_MATH_DISTANCE_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
38 
39 #include <algorithm>
40 #include <cassert>
41 #include <cmath>
42 #include <cstddef>
43 #include <functional>
44 #include <limits>
45 #include <stdexcept>
46 #include <utility>
47 #include <vector>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // Norms
54 // =================================================================================================
55 
69 template <class ForwardIterator>
70 double p_norm( ForwardIterator first, ForwardIterator last, double p = 2.0 )
71 {
72  // Validity. We allow positive inifity.
73  if( p < 1.0 || ( ! std::isfinite( p ) && ! std::isinf( p ))) {
74  throw std::runtime_error( "Cannot calculate p-norm with p < 1.0" );
75  }
76  assert( p >= 1.0 );
77  assert( std::isfinite( p ) || std::isinf( p ));
78 
79  double sum = 0.0;
80  size_t cnt = 0;
81 
82  // Add vector elements.
83  auto it = first;
84  while( it != last ) {
85  if( std::isfinite( *it ) ) {
86  if( std::isfinite( p )) {
87  sum += std::pow( std::abs( *it ), p );
88  } else {
89  sum = std::max( sum, std::abs( *it ));
90  }
91  ++cnt;
92  }
93  ++it;
94  }
95 
96  // If there are no valid elements, return an all-zero result.
97  if( cnt == 0 ) {
98  return 0.0;
99  }
100 
101  // Return the result.
102  assert( cnt > 0 );
103  if( std::isfinite( p )) {
104  return std::pow( sum, 1.0 / p );
105  } else {
106  return sum;
107  }
108 
109  // Make old compilers happy.
110  return 0.0;
111 }
112 
119 inline double p_norm( std::vector<double> const& vec, double p = 2.0 )
120 {
121  return p_norm( vec.begin(), vec.end(), p );
122 }
123 
130 template <class ForwardIterator>
131 double manhattan_norm( ForwardIterator first, ForwardIterator last )
132 {
133  return p_norm( first, last, 1.0 );
134 }
135 
142 inline double manhattan_norm( std::vector<double> const& vec )
143 {
144  return p_norm( vec.begin(), vec.end(), 1.0 );
145 }
146 
153 template <class ForwardIterator>
154 double euclidean_norm( ForwardIterator first, ForwardIterator last )
155 {
156  return p_norm( first, last, 2.0 );
157 }
158 
165 inline double euclidean_norm( std::vector<double> const& vec )
166 {
167  return p_norm( vec.begin(), vec.end(), 2.0 );
168 }
169 
177 template <class ForwardIterator>
178 double maximum_norm( ForwardIterator first, ForwardIterator last )
179 {
180  return p_norm( first, last, std::numeric_limits<double>::infinity() );
181 }
182 
190 inline double maximum_norm( std::vector<double> const& vec )
191 {
192  return p_norm( vec.begin(), vec.end(), std::numeric_limits<double>::infinity() );
193 }
194 
215 template <class ForwardIterator>
216 double aitchison_norm( ForwardIterator first, ForwardIterator last )
217 {
218  double sum = 0.0;
219  size_t cnt = 0;
220 
221  // Outer loop.
222  auto it_out = first;
223  while( it_out != last ) {
224  if( std::isfinite( *it_out ) ) {
225 
226  if( *it_out <= 0.0 ) {
227  throw std::invalid_argument(
228  "Cannot calculate Aitchison norm of non-positive values."
229  );
230  }
231 
232  // Inner loop.
233  auto it_in = first;
234  while( it_in != last ) {
235  if( std::isfinite( *it_in ) ) {
236  auto const ln = std::log( *it_out / *it_in );
237  sum += ln * ln;
238  }
239  ++it_in;
240  }
241 
242  ++cnt;
243  }
244  ++it_out;
245  }
246 
247  // If there are no valid elements, return an all-zero result.
248  if( cnt == 0 ) {
249  return 0.0;
250  }
251 
252  // Return the result.
253  assert( cnt > 0 );
254  return std::sqrt( sum / ( 2.0 * static_cast<double>( cnt )));
255 }
256 
262 inline double aitchison_norm( std::vector<double> const& vec )
263 {
264  return aitchison_norm( vec.begin(), vec.end() );
265 }
266 
267 // =================================================================================================
268 // Distances
269 // =================================================================================================
270 
286 template <class ForwardIteratorA, class ForwardIteratorB>
288  ForwardIteratorA first_a, ForwardIteratorA last_a,
289  ForwardIteratorB first_b, ForwardIteratorB last_b,
290  double p = 2.0
291 ) {
292  // Validity. We allow positive inifity.
293  if( p < 1.0 || ( ! std::isfinite( p ) && ! std::isinf( p ))) {
294  throw std::runtime_error( "Cannot calculate p-norm distance with p < 1.0" );
295  }
296  assert( p >= 1.0 );
297  assert( std::isfinite( p ) || std::isinf( p ));
298 
299  // For "normal" p norms, just add up. For maximum norm (p=inf), we need a special case,
300  // as double-precision arithmetics does not work the same as actual math :-)
301  double sum = 0.0;
302  size_t cnt = 0;
303  if( std::isfinite( p )) {
304  assert( p >= 1.0 );
305  for_each_finite_pair( first_a, last_a, first_b, last_b, [&]( double val_a, double val_b ){
306  sum += std::pow( std::abs( val_a - val_b ), p );
307  ++cnt;
308  });
309  } else {
310  assert( std::isinf( p ));
311  for_each_finite_pair( first_a, last_a, first_b, last_b, [&]( double val_a, double val_b ){
312  sum = std::max( sum, std::abs( val_a - val_b ) );
313  ++cnt;
314  });
315  }
316 
317  // If there are no valid elements, return an all-zero result.
318  if( cnt == 0 ) {
319  return 0.0;
320  }
321 
322  // Return the result.
323  assert( cnt > 0 );
324  if( std::isfinite( p )) {
325  return std::pow( sum, 1.0 / p );
326  } else {
327  return sum;
328  }
329 
330  // Make old compilers happy.
331  return 0.0;
332 }
333 
339 inline double p_norm_distance(
340  std::vector<double> const& vec_a, std::vector<double> const& vec_b, double p = 2.0
341 ) {
342  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), p );
343 }
344 
352 template <class ForwardIteratorA, class ForwardIteratorB>
354  ForwardIteratorA first_a, ForwardIteratorA last_a,
355  ForwardIteratorB first_b, ForwardIteratorB last_b
356 ) {
357  return p_norm_distance( first_a, last_a, first_b, last_b, 1.0 );
358 }
359 
367 inline double manhattan_distance(
368  std::vector<double> const& vec_a, std::vector<double> const& vec_b
369 ) {
370  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), 1.0 );
371 }
372 
380 template <class ForwardIteratorA, class ForwardIteratorB>
382  ForwardIteratorA first_a, ForwardIteratorA last_a,
383  ForwardIteratorB first_b, ForwardIteratorB last_b
384 ) {
385  return p_norm_distance( first_a, last_a, first_b, last_b, 2.0 );
386 }
387 
395 inline double euclidean_distance(
396  std::vector<double> const& vec_a, std::vector<double> const& vec_b
397 ) {
398  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), 2.0 );
399 }
400 
409 template <class ForwardIteratorA, class ForwardIteratorB>
411  ForwardIteratorA first_a, ForwardIteratorA last_a,
412  ForwardIteratorB first_b, ForwardIteratorB last_b
413 ) {
414  return p_norm_distance(
415  first_a, last_a, first_b, last_b, std::numeric_limits<double>::infinity()
416  );
417 }
418 
428 inline double maximum_distance(
429  std::vector<double> const& vec_a, std::vector<double> const& vec_b
430 ) {
431  return p_norm_distance(
432  vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(),
433  std::numeric_limits<double>::infinity()
434  );
435 }
436 
437 // =================================================================================================
438 // Distances Matrices
439 // =================================================================================================
440 
452 Matrix<double> p_norm_distance_matrix( Matrix<double> const& data, double p = 2.0 );
453 
460 Matrix<double> manhattan_distance_matrix( Matrix<double> const& data );
461 
468 Matrix<double> euclidean_distance_matrix( Matrix<double> const& data );
469 
477 Matrix<double> maximum_distance_matrix( Matrix<double> const& data );
478 
479 } // namespace utils
480 } // namespace genesis
481 
482 #endif // include guard
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::utils::sum
double sum(const Histogram &h)
Definition: utils/math/histogram/stats.cpp:140
ranking.hpp
common.hpp
genesis::utils::maximum_distance
double maximum_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Maximum norm (infinity norm) distance between two (mathematical) vectors.
Definition: distance.hpp:410
genesis::utils::euclidean_distance
double euclidean_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Euclidean norm (L2 norm) distance between two (mathematical) vectors.
Definition: distance.hpp:381
genesis::utils::manhattan_distance_matrix
Matrix< double > manhattan_distance_matrix(Matrix< double > const &data)
Calculate the pairwise manhatten distance matrix between the rows of a given matrix.
Definition: distance.cpp:105
genesis::utils::manhattan_distance
double manhattan_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Manhattan norm (L1 norm) distance between two (mathematical) vectors.
Definition: distance.hpp:353
matrix.hpp
genesis::utils::maximum_distance_matrix
Matrix< double > maximum_distance_matrix(Matrix< double > const &data)
Calculate the pairwise maximum distance matrix between the rows of a given matrix.
Definition: distance.cpp:115
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::maximum_norm
double maximum_norm(ForwardIterator first, ForwardIterator last)
Calculate the Maximum norm (infinity norm) of a range of numbers.
Definition: distance.hpp:178
genesis::utils::manhattan_norm
double manhattan_norm(ForwardIterator first, ForwardIterator last)
Calculate the Manhattan norm (L1 norm) of a range of numbers.
Definition: distance.hpp:131
genesis::utils::aitchison_norm
double aitchison_norm(ForwardIterator first, ForwardIterator last)
Calculate the Aitchison norm of a range of positive numbers.
Definition: distance.hpp:216
genesis::utils::p_norm_distance_matrix
Matrix< double > p_norm_distance_matrix(Matrix< double > const &data, double p)
Calculate the pairwise distance matrix between the rows of a given matrix.
Definition: distance.cpp:48
genesis::utils::p_norm
double p_norm(ForwardIterator first, ForwardIterator last, double p=2.0)
Calculate the p-norm of a range of numbers.
Definition: distance.hpp:70
genesis::utils::euclidean_norm
double euclidean_norm(ForwardIterator first, ForwardIterator last)
Calculate the Euclidean norm (L2 norm) of a range of numbers.
Definition: distance.hpp:154
genesis::utils::p_norm_distance
double p_norm_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b, double p=2.0)
Calculate the p-norm distance between two (mathematical) vectors.
Definition: distance.hpp:287
genesis::utils::for_each_finite_pair
void for_each_finite_pair(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b, std::function< void(double value_a, double value_b)> execute)
Iterate two ranges of double values in parallel, and execute a function for each pair of values from ...
Definition: common.hpp:286
genesis::utils::euclidean_distance_matrix
Matrix< double > euclidean_distance_matrix(Matrix< double > const &data)
Calculate the pairwise euclidean distance matrix between the rows of a given matrix.
Definition: distance.cpp:110