A library for working with phylogenetic and population genetic data.
v0.27.0
ranking.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_MATH_RANKING_H_
2 #define GENESIS_UTILS_MATH_RANKING_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
35 
36 #include <algorithm>
37 #include <cassert>
38 #include <cmath>
39 #include <utility>
40 #include <vector>
41 
42 namespace genesis {
43 namespace utils {
44 
45 // =================================================================================================
46 // Ranking Standard
47 // =================================================================================================
48 
58 template <class RandomAccessIterator>
59 std::vector<size_t> ranking_standard( RandomAccessIterator first, RandomAccessIterator last )
60 {
61  // Prepare result, and get the sorting order of the vector.
62  auto const size = static_cast<size_t>( std::distance( first, last ));
63  auto result = std::vector<size_t>( size, 1 );
64  auto const order = stable_sort_indices( first, last );
65 
66  // Shortcuts for better readability.
67  auto ordered_value = [&]( size_t i ){
68  return *( first + order[i] );
69  };
70  auto ordered_result = [&]( size_t i ) -> size_t& {
71  return result[ order[i] ];
72  };
73 
74  // Calculate ranks.
75  for( size_t i = 1; i < size; ++i ) {
76 
77  // Same values get the same rank. The next bigger one continues at the current i.
78  if( ordered_value( i ) == ordered_value( i - 1 ) ) {
79  ordered_result( i ) = ordered_result( i - 1 );
80  } else {
81  ordered_result( i ) = i + 1;
82  }
83  }
84 
85  return result;
86 }
87 
91 inline std::vector<size_t> ranking_standard( std::vector<double> const& vec )
92 {
93  return ranking_standard( vec.begin(), vec.end() );
94 }
95 
96 // =================================================================================================
97 // Ranking Modified
98 // =================================================================================================
99 
109 template <class RandomAccessIterator>
110 std::vector<size_t> ranking_modified( RandomAccessIterator first, RandomAccessIterator last )
111 {
112  // Prepare result, and get the sorting order of the vector.
113  auto const size = static_cast<size_t>( std::distance( first, last ));
114  auto result = std::vector<size_t>( size, 1 );
115  auto const order = stable_sort_indices( first, last );
116 
117  // Shortcuts for better readability.
118  auto ordered_value = [&]( size_t i ){
119  return *( first + order[i] );
120  };
121  auto ordered_result = [&]( size_t i ) -> size_t& {
122  return result[ order[i] ];
123  };
124 
125  // Calculate ranks. The loop variable is incremented at the end.
126  for( size_t i = 0; i < size; ) {
127 
128  // Look ahead: How often does the value occur?
129  size_t j = 1;
130  while( i+j < size && ordered_value(i+j) == ordered_value(i) ) {
131  ++j;
132  }
133 
134  // Set the j-next entries.
135  for( size_t k = 0; k < j; ++k ) {
136  ordered_result( i + k ) = i + j;
137  }
138 
139  // We can skip the j-next loop iterations, as we just set their values
140  i += j;
141  }
142 
143  return result;
144 }
145 
149 inline std::vector<size_t> ranking_modified( std::vector<double> const& vec )
150 {
151  return ranking_modified( vec.begin(), vec.end() );
152 }
153 
154 // =================================================================================================
155 // Ranking Dense
156 // =================================================================================================
157 
166 template <class RandomAccessIterator>
167 std::vector<size_t> ranking_dense( RandomAccessIterator first, RandomAccessIterator last )
168 {
169  // Prepare result, and get the sorting order of the vector.
170  auto const size = static_cast<size_t>( std::distance( first, last ));
171  auto result = std::vector<size_t>( size, 1 );
172  auto const order = stable_sort_indices( first, last );
173 
174  // Shortcuts for better readability.
175  auto ordered_value = [&]( size_t i ){
176  return *( first + order[i] );
177  };
178  auto ordered_result = [&]( size_t i ) -> size_t& {
179  return result[ order[i] ];
180  };
181 
182  // Calculate ranks.
183  for( size_t i = 1; i < size; ++i ) {
184 
185  // Same values get the same rank. The next bigger one continues by incrementing.
186  if( ordered_value( i ) == ordered_value( i - 1 ) ) {
187  ordered_result( i ) = ordered_result( i - 1 );
188  } else {
189  ordered_result( i ) = ordered_result( i - 1 ) + 1;
190  }
191  }
192 
193  return result;
194 }
195 
199 inline std::vector<size_t> ranking_dense( std::vector<double> const& vec )
200 {
201  return ranking_dense( vec.begin(), vec.end() );
202 }
203 
204 // =================================================================================================
205 // Ranking Ordinal
206 // =================================================================================================
207 
216 template <class RandomAccessIterator>
217 std::vector<size_t> ranking_ordinal( RandomAccessIterator first, RandomAccessIterator last )
218 {
219  // Prepare result, and get the sorting order of the vector.
220  auto const size = static_cast<size_t>( std::distance( first, last ));
221  auto result = std::vector<size_t>( size, 1 );
222  auto const order = stable_sort_indices( first, last );
223 
224  // Shortcuts for better readability.
225  auto ordered_result = [&]( size_t i ) -> size_t& {
226  return result[ order[i] ];
227  };
228 
229  // Calculate ranks. This is simply the order plus 1 (as ranks are 1-based).
230  for( size_t i = 0; i < size; ++i ) {
231  ordered_result( i ) = i + 1;
232  }
233 
234  return result;
235 }
236 
240 inline std::vector<size_t> ranking_ordinal( std::vector<double> const& vec )
241 {
242  return ranking_ordinal( vec.begin(), vec.end() );
243 }
244 
245 // =================================================================================================
246 // Ranking Fractional
247 // =================================================================================================
248 
259 template <class RandomAccessIterator>
260 std::vector<double> ranking_fractional( RandomAccessIterator first, RandomAccessIterator last )
261 {
262  // Prepare result, and get the sorting order of the vector.
263  auto const size = static_cast<size_t>( std::distance( first, last ));
264  auto result = std::vector<double>( size, 1 );
265  auto const order = stable_sort_indices( first, last );
266 
267  // Shortcuts for better readability.
268  auto ordered_value = [&]( size_t i ){
269  return *( first + order[i] );
270  };
271  auto ordered_result = [&]( size_t i ) -> double& {
272  return result[ order[i] ];
273  };
274 
275  // Calculate the average of the sum of numbers in the given inclusive range.
276  auto sum_avg = []( size_t l, size_t r )
277  {
278  assert( l > 0 );
279  assert( r > 0 );
280  assert( l <= r );
281 
282  // Example: l == 7, r == 9
283  // We want: (7 + 8 + 9) / 3 = 8.0
284  // Upper: 1+2+3+4+5+6+7+8+9 = 45
285  // Lower: 1+2+3+4+5+6 = 21
286  // Diff: 45 - 21 = 24
287  // Count: 9 - 7 + 1 = 3
288  // Result: 24 / 3 = 8
289  auto const upper = r * ( r + 1 ) / 2;
290  auto const lower = ( l - 1 ) * l / 2;
291  return static_cast<double>( upper - lower ) / static_cast<double>( r - l + 1 );
292  };
293 
294  // Calculate ranks. The loop variable is incremented at the end.
295  for( size_t i = 0; i < size; ) {
296 
297  // Look ahead: How often does the value occur?
298  size_t j = 1;
299  while( i+j < size && ordered_value(i+j) == ordered_value(i) ) {
300  ++j;
301  }
302 
303  // Set the j-next entries.
304  auto entry = sum_avg( i + 1, i + j );
305  for( size_t k = 0; k < j; ++k ) {
306  ordered_result( i + k ) = entry;
307  }
308 
309  // We can skip the j-next loop iterations, as we just set their values
310  i += j;
311  }
312 
313  return result;
314 }
315 
319 inline std::vector<double> ranking_fractional( std::vector<double> const& vec )
320 {
321  return ranking_fractional( vec.begin(), vec.end() );
322 }
323 
324 } // namespace utils
325 } // namespace genesis
326 
327 #endif // include guard
algorithm.hpp
Provides some valuable algorithms that are not part of the C++ 11 STL.
genesis::utils::ranking_modified
std::vector< size_t > ranking_modified(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Modified competition ranking ("1334" ranki...
Definition: ranking.hpp:110
genesis::utils::ranking_fractional
std::vector< double > ranking_fractional(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Fractional ranking ("1 2....
Definition: ranking.hpp:260
genesis::utils::ranking_ordinal
std::vector< size_t > ranking_ordinal(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Ordinal ranking ("1234" ranking).
Definition: ranking.hpp:217
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::ranking_dense
std::vector< size_t > ranking_dense(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Dense ranking ("1223" ranking).
Definition: ranking.hpp:167
genesis::utils::stable_sort_indices
std::vector< size_t > stable_sort_indices(RandomAccessIterator first, RandomAccessIterator last, Comparator comparator)
Get the indices to the stable sorted order of the given range.
Definition: algorithm.hpp:239
genesis::utils::ranking_standard
std::vector< size_t > ranking_standard(RandomAccessIterator first, RandomAccessIterator last)
Return the ranking of the values in the given range, using Standard competition ranking ("1224" ranki...
Definition: ranking.hpp:59