41 #include <unordered_map>
56 std::vector<double>
const& x,
57 std::vector<double>
const& y
61 std::unordered_map<double, size_t> unique_x;
62 std::unordered_map<double, size_t> unique_y;
63 assert( x.size() == y.size() );
64 for(
size_t i = 0; i < x.size(); ++i ) {
65 if( std::isfinite(x[i]) && std::isfinite(y[i]) ) {
70 return std::min( unique_x.size(), unique_y.size() );
81 std::vector<double>
const& x, std::vector<double>
const& y,
82 size_t const concordant,
size_t const discordant,
83 size_t const n,
size_t const n0,
size_t const n1,
size_t const n2,
size_t const n3,
86 double tau = std::numeric_limits<double>::quiet_NaN();
87 assert( x.size() == y.size() );
91 assert( n0 == n * (n - 1) / 2 );
92 assert( n1 <= n0 && n2 <= n0 );
93 assert( n3 <= n1 && n3 <= n2 );
94 assert( concordant <= n0 );
95 assert( discordant <= n0 );
96 assert( n0 == concordant + discordant + n1 + n2 - n3 );
102 double const num =
static_cast<int64_t
>(concordant) -
static_cast<int64_t
>(discordant);
106 double const den = concordant + discordant;
116 double const den = std::sqrt(
117 static_cast<double>(n0 - n1) *
static_cast<double>(n0 - n2)
119 if( std::isfinite(den) && den != 0.0 ) {
128 double const den =
squared(n) * ( m - 1 ) / m;
129 if( std::isfinite(den) && den != 0.0 && m > 0 ) {
130 tau = 2.0 * num / den;
136 throw std::invalid_argument(
"Invalid value for KendallsTauMethod" );
140 assert( !std::isfinite(tau) || ( -1.0 <= tau && tau <= 1.0 ));
153 std::vector<double>& data, std::vector<double>& temp,
154 size_t left,
size_t mid,
size_t right
159 size_t inversions = 0;
162 while( i < mid && j <= right ) {
163 if( data[i] <= data[j] ) {
164 temp[k++] = data[i++];
166 temp[k++] = data[j++];
167 inversions += mid - i;
173 temp[k++] = data[i++];
175 while( j <= right ) {
176 temp[k++] = data[j++];
180 for( i = left; i <= right; ++i ) {
192 std::vector<double>& data, std::vector<double>& temp,
size_t left,
size_t right
194 if( left >= right ) {
199 size_t const mid = left + (right - left) / 2;
204 return inv_l + inv_r + inv_m;
210 struct kendalls_tau_pair_hash_ {
213 std::size_t operator () ( T
const& value )
const {
214 return std::hash<T>{}(value);
218 template <
class T1,
class T2>
219 std::size_t operator () ( std::pair<T1,T2>
const& value )
const {
220 auto h1 = std::hash<T1>{}(value.first);
221 auto h2 = std::hash<T2>{}(value.second);
231 std::vector<T>
const& values
234 std::unordered_map<T, size_t, kendalls_tau_pair_hash_> unique_counts;
235 for(
auto const& val : values ) {
236 ++unique_counts[val];
242 for(
auto const& pair : unique_counts ) {
243 if( pair.second > 1 ) {
244 tie_sum += pair.second * (pair.second - 1) / 2;
255 std::vector<double>
const& values
257 assert( values.size() > 1 );
261 double cur_val = values[0];
263 for(
size_t i = 0; i < values.size(); ++i ) {
264 assert( std::isfinite( values[i] ));
265 if( values[i] != cur_val ) {
266 assert( values[i] > cur_val );
271 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
283 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
292 std::vector<double>
const& values,
293 std::vector<size_t>
const& ranks
295 assert( values.size() == ranks.size() );
296 assert( values.size() > 1 );
304 double cur_val = values[ranks[0]];
306 for(
size_t i = 0; i < values.size(); ++i ) {
307 assert( std::isfinite( values[ranks[i]] ));
308 if( values[ranks[i]] != cur_val ) {
309 assert( values[ranks[i]] > cur_val );\
310 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
311 cur_val = values[ranks[i]];
317 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
326 std::vector<double>
const& x,
327 std::vector<double>
const& y,
328 std::vector<size_t>
const& ranks
330 assert( x.size() == y.size() );
331 assert( x.size() == ranks.size() );
332 assert( x.size() > 1 );
336 double cur_val_x = x[ranks[0]];
337 double cur_val_y = y[ranks[0]];
339 for(
size_t i = 0; i < x.size(); ++i ) {
340 assert( std::isfinite( x[ranks[i]] ));
341 assert( std::isfinite( y[ranks[i]] ));
342 if( x[ranks[i]] != cur_val_x || y[ranks[i]] != cur_val_y ) {
343 assert( x[ranks[i]] > cur_val_x || ( x[ranks[i]] == cur_val_x && y[ranks[i]] > cur_val_y ));
344 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
345 cur_val_x = x[ranks[i]];
346 cur_val_y = y[ranks[i]];
352 tie_sum += cur_cnt * (cur_cnt - 1) / 2;
360 std::vector<double>
const& x,
361 std::vector<double>
const& y,
365 assert( x.size() == y.size() );
367 return std::numeric_limits<double>::quiet_NaN();
378 size_t const n = x.size();
379 std::vector<size_t> rank_x(n);
380 std::iota( rank_x.begin(), rank_x.end(), 0 );
381 std::sort( rank_x.begin(), rank_x.end(),
382 [&](
size_t i,
size_t j ) {
383 return x[i] == x[j] ? y[i] < y[j] : x[i] < x[j];
393 std::vector<double> sorted_y(n);
394 for(
size_t i = 0; i < n; ++i ) {
395 sorted_y[i] = y[rank_x[i]];
401 std::vector<double> temp(n);
403 assert( std::is_sorted( sorted_y.begin(), sorted_y.end() ));
411 size_t const n0 = n * (n - 1) / 2;
412 assert( n0 >= n1 && n0 >= n2 );
415 size_t const concordant = n0 - n1 - n2 + n3 - discordant;
416 assert( concordant <= n0 );
417 assert( discordant <= n0 );
424 std::vector<double>
const& x,
425 std::vector<double>
const& y,
429 if( x.size() != y.size() ) {
430 throw std::invalid_argument(
431 "kendalls_tau_correlation_coefficient: Input with differing numbers of elements."
439 size_t nan_count = 0;
440 for(
size_t i = 0; i < x.size(); ++i ) {
441 if( !std::isfinite(x[i]) || !std::isfinite(y[i]) ) {
445 if( nan_count > 0 ) {
446 assert( nan_count <= x.size() );
447 std::vector<double> x_clean;
448 std::vector<double> y_clean;
449 x_clean.reserve( x.size() - nan_count );
450 y_clean.reserve( x.size() - nan_count );
451 for(
size_t i = 0; i < x.size(); ++i ) {
452 if( std::isfinite(x[i]) && std::isfinite(y[i]) ) {
453 x_clean.push_back(x[i]);
454 y_clean.push_back(y[i]);
469 std::vector<double>
const& x,
470 std::vector<double>
const& y,
474 if( x.size() != y.size() ) {
475 throw std::invalid_argument(
476 "kendalls_tau_correlation_coefficient: Input with differing numbers of elements."
481 size_t concordant = 0;
482 size_t discordant = 0;
494 for(
size_t i = 0; i < x.size(); ++i ) {
495 if( std::isfinite(x[i]) && std::isfinite(y[i]) ) {
501 for(
size_t j = i + 1; j < x.size(); ++j ) {
503 if( !std::isfinite(x[j]) || !std::isfinite(y[j]) ) {
508 double const dx = x[i] - x[j];
509 double const dy = y[i] - y[j];
510 assert( std::isfinite(dx) && std::isfinite(dy) );
514 if( dx == 0 && dy == 0 ) {
519 }
else if( dx == 0 ) {
521 }
else if( dy == 0 ) {
523 }
else if( dx * dy > 0 ) {
525 }
else if( dx * dy < 0 ) {