A toolkit for working with phylogenetic data.
v0.24.0
distance.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_DISTANCE_H_
2 #define GENESIS_UTILS_MATH_DISTANCE_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2018 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
38 
39 #include <algorithm>
40 #include <cassert>
41 #include <cmath>
42 #include <cstddef>
43 #include <functional>
44 #include <limits>
45 #include <stdexcept>
46 #include <utility>
47 #include <vector>
48 
49 namespace genesis {
50 namespace utils {
51 
52 // =================================================================================================
53 // Norms
54 // =================================================================================================
55 
69 template <class ForwardIterator>
70 double p_norm( ForwardIterator first, ForwardIterator last, double p = 2.0 )
71 {
72  // Validity. We allow positive inifity.
73  if( p < 1.0 || ( ! std::isfinite( p ) && ! std::isinf( p ))) {
74  throw std::runtime_error( "Cannot calculate p-norm with p < 1.0" );
75  }
76  assert( p >= 1.0 );
77  assert( std::isfinite( p ) || std::isinf( p ));
78 
79  double sum = 0.0;
80  size_t cnt = 0;
81 
82  // Add vector elements.
83  auto it = first;
84  while( it != last ) {
85  if( std::isfinite( *it ) ) {
86  if( std::isfinite( p )) {
87  sum += std::pow( std::abs( *it ), p );
88  } else {
89  sum = std::max( sum, std::abs( *it ));
90  }
91  ++cnt;
92  }
93  ++it;
94  }
95 
96  // If there are no valid elements, return an all-zero result.
97  if( cnt == 0 ) {
98  return 0.0;
99  }
100 
101  // Return the result.
102  assert( cnt > 0 );
103  if( std::isfinite( p )) {
104  return std::pow( sum, 1.0 / p );
105  } else {
106  return sum;
107  }
108 
109  // Make old compilers happy.
110  return 0.0;
111 }
112 
119 inline double p_norm( std::vector<double> const& vec, double p = 2.0 )
120 {
121  return p_norm( vec.begin(), vec.end(), p );
122 }
123 
130 template <class ForwardIterator>
131 double manhattan_norm( ForwardIterator first, ForwardIterator last )
132 {
133  return p_norm( first, last, 1.0 );
134 }
135 
142 inline double manhattan_norm( std::vector<double> const& vec )
143 {
144  return p_norm( vec.begin(), vec.end(), 1.0 );
145 }
146 
153 template <class ForwardIterator>
154 double euclidean_norm( ForwardIterator first, ForwardIterator last )
155 {
156  return p_norm( first, last, 2.0 );
157 }
158 
165 inline double euclidean_norm( std::vector<double> const& vec )
166 {
167  return p_norm( vec.begin(), vec.end(), 2.0 );
168 }
169 
177 template <class ForwardIterator>
178 double maximum_norm( ForwardIterator first, ForwardIterator last )
179 {
180  return p_norm( first, last, std::numeric_limits<double>::infinity() );
181 }
182 
190 inline double maximum_norm( std::vector<double> const& vec )
191 {
192  return p_norm( vec.begin(), vec.end(), std::numeric_limits<double>::infinity() );
193 }
194 
215 template <class ForwardIterator>
216 double aitchison_norm( ForwardIterator first, ForwardIterator last )
217 {
218  double sum = 0.0;
219  size_t cnt = 0;
220 
221  // Outer loop.
222  auto it_out = first;
223  while( it_out != last ) {
224  if( std::isfinite( *it_out ) ) {
225 
226  if( *it_out <= 0.0 ) {
227  throw std::invalid_argument(
228  "Cannot calculate Aitchison norm of non-positive values."
229  );
230  }
231 
232  // Inner loop.
233  auto it_in = first;
234  while( it_in != last ) {
235  if( std::isfinite( *it_in ) ) {
236  auto const ln = std::log( *it_out / *it_in );
237  sum += ln * ln;
238  }
239  ++it_in;
240  }
241 
242  ++cnt;
243  }
244  ++it_out;
245  }
246 
247  // If there are no valid elements, return an all-zero result.
248  if( cnt == 0 ) {
249  return 0.0;
250  }
251 
252  // Return the result.
253  assert( cnt > 0 );
254  return std::sqrt( sum / ( 2.0 * static_cast<double>( cnt )));
255 }
256 
262 inline double aitchison_norm( std::vector<double> const& vec )
263 {
264  return aitchison_norm( vec.begin(), vec.end() );
265 }
266 
267 // =================================================================================================
268 // Distances
269 // =================================================================================================
270 
286 template <class ForwardIteratorA, class ForwardIteratorB>
288  ForwardIteratorA first_a, ForwardIteratorA last_a,
289  ForwardIteratorB first_b, ForwardIteratorB last_b,
290  double p = 2.0
291 ) {
292  // Validity. We allow positive inifity.
293  if( p < 1.0 || ( ! std::isfinite( p ) && ! std::isinf( p ))) {
294  throw std::runtime_error( "Cannot calculate p-norm distance with p < 1.0" );
295  }
296  assert( p >= 1.0 );
297  assert( std::isfinite( p ) || std::isinf( p ));
298 
299  double sum = 0.0;
300  size_t cnt = 0;
301 
302  for_each_finite_pair( first_a, last_a, first_b, last_b, [&]( double val_a, double val_b ){
303  if( std::isfinite( p )) {
304  sum += std::pow( std::abs( val_a - val_b ), p );
305  } else {
306  sum = std::max( sum, std::abs( val_a - val_b ) );
307  }
308  ++cnt;
309  });
310 
311  // If there are no valid elements, return an all-zero result.
312  if( cnt == 0 ) {
313  return 0.0;
314  }
315 
316  // Return the result.
317  assert( cnt > 0 );
318  if( std::isfinite( p )) {
319  return std::pow( sum, 1.0 / p );
320  } else {
321  return sum;
322  }
323 
324  // Make old compilers happy.
325  return 0.0;
326 }
327 
333 inline double p_norm_distance(
334  std::vector<double> const& vec_a, std::vector<double> const& vec_b, double p = 2.0
335 ) {
336  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), p );
337 }
338 
346 template <class ForwardIteratorA, class ForwardIteratorB>
348  ForwardIteratorA first_a, ForwardIteratorA last_a,
349  ForwardIteratorB first_b, ForwardIteratorB last_b
350 ) {
351  return p_norm_distance( first_a, last_a, first_b, last_b, 1.0 );
352 }
353 
361 inline double manhattan_distance(
362  std::vector<double> const& vec_a, std::vector<double> const& vec_b
363 ) {
364  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), 1.0 );
365 }
366 
374 template <class ForwardIteratorA, class ForwardIteratorB>
376  ForwardIteratorA first_a, ForwardIteratorA last_a,
377  ForwardIteratorB first_b, ForwardIteratorB last_b
378 ) {
379  return p_norm_distance( first_a, last_a, first_b, last_b, 2.0 );
380 }
381 
389 inline double euclidean_distance(
390  std::vector<double> const& vec_a, std::vector<double> const& vec_b
391 ) {
392  return p_norm_distance( vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(), 2.0 );
393 }
394 
403 template <class ForwardIteratorA, class ForwardIteratorB>
405  ForwardIteratorA first_a, ForwardIteratorA last_a,
406  ForwardIteratorB first_b, ForwardIteratorB last_b
407 ) {
408  return p_norm_distance(
409  first_a, last_a, first_b, last_b, std::numeric_limits<double>::infinity()
410  );
411 }
412 
422 inline double maximum_distance(
423  std::vector<double> const& vec_a, std::vector<double> const& vec_b
424 ) {
425  return p_norm_distance(
426  vec_a.begin(), vec_a.end(), vec_b.begin(), vec_b.end(),
427  std::numeric_limits<double>::infinity()
428  );
429 }
430 
431 // =================================================================================================
432 // Distances Matrices
433 // =================================================================================================
434 
446 Matrix<double> p_norm_distance_matrix( Matrix<double> const& data, double p = 2.0 );
447 
455 
463 
472 
473 } // namespace utils
474 } // namespace genesis
475 
476 #endif // include guard
double euclidean_norm(ForwardIterator first, ForwardIterator last)
Calculate the Euclidean norm (L2 norm) of a range of numbers.
Definition: distance.hpp:154
double p_norm(ForwardIterator first, ForwardIterator last, double p=2.0)
Calculate the p-norm of a range of numbers.
Definition: distance.hpp:70
double maximum_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Maximum norm (infinity norm) distance between two (mathematical) vectors.
Definition: distance.hpp:404
Provides some valuable algorithms that are not part of the C++ 11 STL.
double manhattan_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Manhattan norm (L1 norm) distance between two (mathematical) vectors.
Definition: distance.hpp:347
double manhattan_norm(ForwardIterator first, ForwardIterator last)
Calculate the Manhattan norm (L1 norm) of a range of numbers.
Definition: distance.hpp:131
double aitchison_norm(ForwardIterator first, ForwardIterator last)
Calculate the Aitchison norm of a range of positive numbers.
Definition: distance.hpp:216
double euclidean_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b)
Calculate the Euclidean norm (L2 norm) distance between two (mathematical) vectors.
Definition: distance.hpp:375
double sum(const Histogram &h)
Container namespace for all symbols of genesis in order to keep them separate when used as a library...
double p_norm_distance(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b, double p=2.0)
Calculate the p-norm distance between two (mathematical) vectors.
Definition: distance.hpp:287
Matrix< double > maximum_distance_matrix(Matrix< double > const &data)
Calculate the pairwise maximum distance matrix between the rows of a given matrix.
Definition: distance.cpp:115
Matrix< double > euclidean_distance_matrix(Matrix< double > const &data)
Calculate the pairwise euclidean distance matrix between the rows of a given matrix.
Definition: distance.cpp:110
Matrix< double > p_norm_distance_matrix(Matrix< double > const &data, double p)
Calculate the pairwise distance matrix between the rows of a given matrix.
Definition: distance.cpp:48
double maximum_norm(ForwardIterator first, ForwardIterator last)
Calculate the Maximum norm (infinity norm) of a range of numbers.
Definition: distance.hpp:178
Matrix< double > manhattan_distance_matrix(Matrix< double > const &data)
Calculate the pairwise manhatten distance matrix between the rows of a given matrix.
Definition: distance.cpp:105
void for_each_finite_pair(ForwardIteratorA first_a, ForwardIteratorA last_a, ForwardIteratorB first_b, ForwardIteratorB last_b, std::function< void(double value_a, double value_b)> execute)
Iterate two ranges of double values in parallel, and execute a function for each pair of values from ...
Definition: common.hpp:223