A library for working with phylogenetic and population genetic data.
v0.32.0
ranking.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_RANKING_H_
2 #define GENESIS_UTILS_MATH_RANKING_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
35 
36 #include <algorithm>
37 #include <cassert>
38 #include <cmath>
39 #include <cstdlib>
40 #include <queue>
41 #include <utility>
42 #include <vector>
43 
44 namespace genesis {
45 namespace utils {
46 
47 // =================================================================================================
48 // Ranking Standard
49 // =================================================================================================
50 
60 template <class RandomAccessIterator>
61 std::vector<size_t> ranking_standard( RandomAccessIterator first, RandomAccessIterator last )
62 {
63  // Prepare result, and get the sorting order of the vector.
64  auto const size = static_cast<size_t>( std::distance( first, last ));
65  auto result = std::vector<size_t>( size, 1 );
66  auto const order = stable_sort_indices( first, last );
67 
68  // Shortcuts for better readability.
69  auto ordered_value = [&]( size_t i ){
70  return *( first + order[i] );
71  };
72  auto ordered_result = [&]( size_t i ) -> size_t& {
73  return result[ order[i] ];
74  };
75 
76  // Calculate ranks.
77  for( size_t i = 1; i < size; ++i ) {
78 
79  // Same values get the same rank. The next bigger one continues at the current i.
80  if( ordered_value( i ) == ordered_value( i - 1 ) ) {
81  ordered_result( i ) = ordered_result( i - 1 );
82  } else {
83  ordered_result( i ) = i + 1;
84  }
85  }
86 
87  return result;
88 }
89 
93 inline std::vector<size_t> ranking_standard( std::vector<double> const& vec )
94 {
95  return ranking_standard( vec.begin(), vec.end() );
96 }
97 
98 // =================================================================================================
99 // Ranking Modified
100 // =================================================================================================
101 
111 template <class RandomAccessIterator>
112 std::vector<size_t> ranking_modified( RandomAccessIterator first, RandomAccessIterator last )
113 {
114  // Prepare result, and get the sorting order of the vector.
115  auto const size = static_cast<size_t>( std::distance( first, last ));
116  auto result = std::vector<size_t>( size, 1 );
117  auto const order = stable_sort_indices( first, last );
118 
119  // Shortcuts for better readability.
120  auto ordered_value = [&]( size_t i ){
121  return *( first + order[i] );
122  };
123  auto ordered_result = [&]( size_t i ) -> size_t& {
124  return result[ order[i] ];
125  };
126 
127  // Calculate ranks. The loop variable is incremented at the end.
128  for( size_t i = 0; i < size; ) {
129 
130  // Look ahead: How often does the value occur?
131  size_t j = 1;
132  while( i+j < size && ordered_value(i+j) == ordered_value(i) ) {
133  ++j;
134  }
135 
136  // Set the j-next entries.
137  for( size_t k = 0; k < j; ++k ) {
138  ordered_result( i + k ) = i + j;
139  }
140 
141  // We can skip the j-next loop iterations, as we just set their values
142  i += j;
143  }
144 
145  return result;
146 }
147 
151 inline std::vector<size_t> ranking_modified( std::vector<double> const& vec )
152 {
153  return ranking_modified( vec.begin(), vec.end() );
154 }
155 
156 // =================================================================================================
157 // Ranking Dense
158 // =================================================================================================
159 
168 template <class RandomAccessIterator>
169 std::vector<size_t> ranking_dense( RandomAccessIterator first, RandomAccessIterator last )
170 {
171  // Prepare result, and get the sorting order of the vector.
172  auto const size = static_cast<size_t>( std::distance( first, last ));
173  auto result = std::vector<size_t>( size, 1 );
174  auto const order = stable_sort_indices( first, last );
175 
176  // Shortcuts for better readability.
177  auto ordered_value = [&]( size_t i ){
178  return *( first + order[i] );
179  };
180  auto ordered_result = [&]( size_t i ) -> size_t& {
181  return result[ order[i] ];
182  };
183 
184  // Calculate ranks.
185  for( size_t i = 1; i < size; ++i ) {
186 
187  // Same values get the same rank. The next bigger one continues by incrementing.
188  if( ordered_value( i ) == ordered_value( i - 1 ) ) {
189  ordered_result( i ) = ordered_result( i - 1 );
190  } else {
191  ordered_result( i ) = ordered_result( i - 1 ) + 1;
192  }
193  }
194 
195  return result;
196 }
197 
201 inline std::vector<size_t> ranking_dense( std::vector<double> const& vec )
202 {
203  return ranking_dense( vec.begin(), vec.end() );
204 }
205 
206 // =================================================================================================
207 // Ranking Ordinal
208 // =================================================================================================
209 
218 template <class RandomAccessIterator>
219 std::vector<size_t> ranking_ordinal( RandomAccessIterator first, RandomAccessIterator last )
220 {
221  // Prepare result, and get the sorting order of the vector.
222  auto const size = static_cast<size_t>( std::distance( first, last ));
223  auto result = std::vector<size_t>( size, 1 );
224  auto const order = stable_sort_indices( first, last );
225 
226  // Shortcuts for better readability.
227  auto ordered_result = [&]( size_t i ) -> size_t& {
228  return result[ order[i] ];
229  };
230 
231  // Calculate ranks. This is simply the order plus 1 (as ranks are 1-based).
232  for( size_t i = 0; i < size; ++i ) {
233  ordered_result( i ) = i + 1;
234  }
235 
236  return result;
237 }
238 
242 inline std::vector<size_t> ranking_ordinal( std::vector<double> const& vec )
243 {
244  return ranking_ordinal( vec.begin(), vec.end() );
245 }
246 
247 // =================================================================================================
248 // Ranking Fractional
249 // =================================================================================================
250 
261 template <class RandomAccessIterator>
262 std::vector<double> ranking_fractional( RandomAccessIterator first, RandomAccessIterator last )
263 {
264  // Prepare result, and get the sorting order of the vector.
265  auto const size = static_cast<size_t>( std::distance( first, last ));
266  auto result = std::vector<double>( size, 1 );
267  auto const order = stable_sort_indices( first, last );
268 
269  // Shortcuts for better readability.
270  auto ordered_value = [&]( size_t i ){
271  return *( first + order[i] );
272  };
273  auto ordered_result = [&]( size_t i ) -> double& {
274  return result[ order[i] ];
275  };
276 
277  // Calculate the average of the sum of numbers in the given inclusive range.
278  auto sum_avg = []( size_t l, size_t r )
279  {
280  assert( l > 0 );
281  assert( r > 0 );
282  assert( l <= r );
283 
284  // Example: l == 7, r == 9
285  // We want: (7 + 8 + 9) / 3 = 8.0
286  // Upper: 1+2+3+4+5+6+7+8+9 = 45
287  // Lower: 1+2+3+4+5+6 = 21
288  // Diff: 45 - 21 = 24
289  // Count: 9 - 7 + 1 = 3
290  // Result: 24 / 3 = 8
291  auto const upper = r * ( r + 1 ) / 2;
292  auto const lower = ( l - 1 ) * l / 2;
293  return static_cast<double>( upper - lower ) / static_cast<double>( r - l + 1 );
294  };
295 
296  // Calculate ranks. The loop variable is incremented at the end.
297  for( size_t i = 0; i < size; ) {
298 
299  // Look ahead: How often does the value occur?
300  size_t j = 1;
301  while( i+j < size && ordered_value(i+j) == ordered_value(i) ) {
302  ++j;
303  }
304 
305  // Set the j-next entries.
306  auto entry = sum_avg( i + 1, i + j );
307  for( size_t k = 0; k < j; ++k ) {
308  ordered_result( i + k ) = entry;
309  }
310 
311  // We can skip the j-next loop iterations, as we just set their values
312  i += j;
313  }
314 
315  return result;
316 }
317 
321 inline std::vector<double> ranking_fractional( std::vector<double> const& vec )
322 {
323  return ranking_fractional( vec.begin(), vec.end() );
324 }
325 
326 // =================================================================================================
327 // N First Elements
328 // =================================================================================================
329 
337 template<
338  typename ForwardIterator,
339  typename T = typename ForwardIterator::value_type,
340  typename Comp = std::less<T>
341 >
342 std::vector<T> n_first_elements(
343  ForwardIterator first, ForwardIterator last, size_t n, Comp comp = std::less<T>{}
344 ) {
345  // Edge case that we need to catch
346  if( n == 0 ) {
347  return {};
348  }
349 
350  // Use a priority queue so that we do not need to sort the whole vector to get the n largest.
351  // We set it up so that top() is the last element we want, and only ever keep n elements.
352  // Then, while iterating the input, we compare each element to that top() element,
353  // and if the iterated element needs to go before the top (according to @p comp),
354  // we replace the top one with the current iterated element.
355  // See https://en.cppreference.com/w/cpp/container/priority_queue for details.
356  std::priority_queue<T, std::vector<T>, decltype(comp)> queue( comp );
357  size_t val_cnt = 0;
358  while( first != last ) {
359  auto v = *first;
360  if( queue.size() < n ) {
361  queue.push( v );
362  } else if( comp( v, queue.top() )) {
363  queue.pop();
364  queue.push( v );
365  }
366  ++first;
367  ++val_cnt;
368  }
369  assert(( val_cnt < n && queue.size() == val_cnt ) || queue.size() == n );
370  (void) val_cnt;
371 
372  // Convert to vector in sorting order. The top() element is actually the _last_ we want
373  // in the returned vector, so we fill it backwards... bit cumbersome, but efficient.
374  std::vector<T> result;
375  result.resize( queue.size() );
376  size_t res_idx = result.size();
377  while( ! queue.empty() ) {
378  assert( res_idx > 0 );
379  --res_idx;
380  result[ res_idx ] = queue.top();
381  queue.pop();
382  }
383  assert( res_idx == 0 );
384  assert(( val_cnt < n && result.size() == val_cnt ) || result.size() == n );
385  return result;
386 }
387 
388 } // namespace utils
389 } // namespace genesis
390 
391 #endif // include guard
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::utils::ranking_modified
std::vector< size_t > ranking_modified(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Modified competition ranking ("1334" ranki...
Definition: ranking.hpp:112
genesis::utils::ranking_fractional
std::vector< double > ranking_fractional(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Fractional ranking ("1 2....
Definition: ranking.hpp:262
genesis::utils::ranking_ordinal
std::vector< size_t > ranking_ordinal(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Ordinal ranking ("1234" ranking).
Definition: ranking.hpp:219
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::ranking_dense
std::vector< size_t > ranking_dense(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Dense ranking ("1223" ranking).
Definition: ranking.hpp:169
genesis::utils::stable_sort_indices
std::vector< size_t > stable_sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the stable sorted order of the given range.
Definition: algorithm.hpp:239
genesis::utils::ranking_standard
std::vector< size_t > ranking_standard(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Standard competition ranking ("1224" ranki...
Definition: ranking.hpp:61
genesis::utils::n_first_elements
std::vector< T > n_first_elements(ForwardIterator first, ForwardIterator last, size_t n, Comp comp=std::less< T >{})
Return the n first elements of a given input range in sorting order.
Definition: ranking.hpp:342