A library for working with phylogenetic and population genetic data.
v0.32.0
histogram.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2020 Lucas Czech and HITS gGmbH
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <cassert>
35 #include <cmath>
36 #include <iterator>
37 #include <numeric>
38 #include <stdexcept>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Friends
45 // =================================================================================================
46 
47 void swap( Histogram& lhs, Histogram& rhs )
48 {
49  using std::swap;
50  swap( lhs.bins_, rhs.bins_);
51  swap( lhs.ranges_, rhs.ranges_);
52  swap( lhs.out_of_range_behaviour_, rhs.out_of_range_behaviour_);
53 }
54 
55 bool equal_ranges( Histogram const& lhs, Histogram const& rhs )
56 {
57  return lhs.ranges_ == rhs.ranges_;
58 }
59 
60 // =================================================================================================
61 // Constructors and Rule of Five
62 // =================================================================================================
63 
65  const size_t num_bins
66 )
67  : Histogram(num_bins, 0.0, 1.0)
68 {
69  // Nothing to do here.
70 }
71 
73  const size_t num_bins,
74  const double range_min,
75  const double range_max
76 )
77  : bins_ (num_bins)
78  , ranges_ (num_bins + 1)
79  , out_of_range_behaviour_ (OutOfRangeBehaviour::kSqueeze)
80 {
81  if (num_bins == 0) {
82  throw std::domain_error("__FUNCTION__: domain_error");
83  }
84 
86 }
87 
89  const std::vector<double>& ranges
90 )
91  : bins_ (ranges.size() - 1, 0.0)
92  , ranges_ (ranges)
93  , out_of_range_behaviour_ (OutOfRangeBehaviour::kSqueeze)
94 {
95  if (ranges.size() < 2) {
96  throw std::domain_error("__FUNCTION__: domain_error");
97  }
98  if (!std::is_sorted(ranges.begin(), ranges.end())) {
99  throw std::invalid_argument("__FUNCTION__: invalid_argument");
100  }
101 }
102 
103 // =================================================================================================
104 // General Methods
105 // =================================================================================================
106 
107 void Histogram::set_ranges( const std::vector<double>& ranges )
108 {
109  if (ranges.size() != ranges_.size() || !std::is_sorted(ranges.begin(), ranges.end())) {
110  throw std::invalid_argument("__FUNCTION__: invalid_argument");
111  }
112 
113  ranges_ = ranges;
114  clear();
115 }
116 
120 void Histogram::set_uniform_ranges( const double min, const double max )
121 {
122  if (min >= max) {
123  throw std::invalid_argument("__FUNCTION__: invalid_argument");
124  }
125 
126  // const size_t num_bins = bins();
127  // const double bin_width = (max - min) / num_bins;
128  //
129  // for (size_t i = 0; i < num_bins + 1; ++i) {
130  // ranges_[i] = min + static_cast<double>(i) * bin_width;
131  // }
132 
133  // More stable version from the GNU Scientific Library.
134  const double n = static_cast<double>(bins());
135  for (size_t i = 0; i < bins() + 1; ++i) {
136  const double p = static_cast<double>(i);
137 
138  const double f1 = (n - p) / n;
139  const double f2 = p / n;
140 
141  ranges_[i] = f1 * min + f2 * max;
142  }
143 
144  clear();
145 }
146 
151 {
152  for (auto& b : bins_) {
153  b = 0.0;
154  }
155 }
156 
158 {
159  return out_of_range_behaviour_;
160 }
161 
163 {
164  out_of_range_behaviour_ = v;
165 }
166 
167 // =================================================================================================
168 // Comparison
169 // =================================================================================================
170 
172 {
173  return bins_ == rhs.bins_ && ranges_ == rhs.ranges_;
174 }
175 
176 // =================================================================================================
177 // Bin Access
178 // =================================================================================================
179 
180 double& Histogram::at ( size_t bin_num )
181 {
182  if (bin_num >= bins_.size()) {
183  throw std::out_of_range("__FUNCTION__: out_of_range");
184  }
185  return bins_.at(bin_num);
186 }
187 
188 double Histogram::at ( size_t bin_num ) const
189 {
190  if (bin_num >= bins_.size()) {
191  throw std::out_of_range("__FUNCTION__: out_of_range");
192  }
193  return bins_.at(bin_num);
194 }
195 
196 double& Histogram::operator [] ( size_t bin_num )
197 {
198  return bins_[bin_num];
199 }
200 
201 double Histogram::operator [] ( size_t bin_num ) const
202 {
203  return bins_[bin_num];
204 }
205 
206 // =================================================================================================
207 // Bin Iterators
208 // =================================================================================================
209 
211 {
212  return bins_.begin();
213 }
214 
216 {
217  return bins_.end();
218 }
219 
221 {
222  return bins_.cbegin();
223 }
224 
226 {
227  return bins_.cend();
228 }
229 
231 {
232  return bins_.cbegin();
233 }
234 
236 {
237  return bins_.cend();
238 }
239 
240 // =================================================================================================
241 // Properties
242 // =================================================================================================
243 
244 size_t Histogram::bins() const
245 {
246  return bins_.size();
247 }
248 
249 std::pair<double, double> Histogram::bin_range( size_t bin_num ) const
250 {
251  return std::make_pair(ranges_[bin_num], ranges_[bin_num + 1]);
252 }
253 
254 double Histogram::bin_midpoint( size_t bin_num ) const
255 {
256  return (ranges_[bin_num] + ranges_[bin_num + 1]) / 2;
257 }
258 
259 double Histogram::bin_width( size_t bin_num ) const
260 {
261  return ranges_[bin_num + 1] - ranges_[bin_num];
262 }
263 
264 long Histogram::find_bin( double x ) const
265 {
266  const auto r_min = ranges_.front();
267  const auto r_max = ranges_.back();
268 
269  // Check boundary cases. Inf is handled correctly by this.
270  // NaN is either considered to be out of bounds, our in case of OutOfRangeBehaviour::kSqueeze,
271  // it is taken to be 0.
272  if( x < r_min || std::isnan( x )) {
273  return -1;
274  }
275  if( x >= r_max ) {
276  return bins_.size();
277  }
278 
279  // Get the bin for the uniform ranges case.
280  const double bin_width = (r_max - r_min) / static_cast<double>( bins_.size() );
281  const long bin = static_cast<long>( std::floor( (x - r_min) / bin_width ) );
282 
283  // With the calculation above, we always end up within the boundaries.
284  assert(r_min <= x && x < r_max);
285 
286  // Now check if we actually found the correct bin. If so, simply return the bin number.
287  if (ranges_[bin] <= x && x < ranges_[bin + 1]) {
288  return bin;
289  }
290 
291  // If not, do a binary search.
292  const auto it = std::upper_bound(ranges_.begin(), ranges_.end(), x);
293  return std::distance(ranges_.begin(), it) - 1;
294 }
295 
296 double Histogram::range_min() const
297 {
298  return ranges_.front();
299 }
300 
301 double Histogram::range_max() const
302 {
303  return ranges_.back();
304 }
305 
306 bool Histogram::check_range(double x) const
307 {
308  return range_min() <= x && x < range_max();
309 }
310 
311 // =================================================================================================
312 // Modifiers
313 // =================================================================================================
314 
315 long Histogram::increment( double x )
316 {
317  return accumulate(x, 1.0);
318 }
319 
320 long Histogram::accumulate( double x, double weight )
321 {
322  long bin = find_bin(x);
323 
324  // Do boundary check on the bin.
325  // The cast is only done if we already know that bin >= 0, so it is always valid.
326  if (bin < 0 || static_cast <size_t>(bin) >= bins()) {
327  switch (out_of_range_behaviour()) {
329  return bin;
330 
332  if (bin < 0) {
333  bin = 0;
334  } else {
335  bin = bins() - 1;
336  }
337  break;
338 
340  throw std::out_of_range("__FUNCTION__: out_of_range");
341  }
342  }
343 
344  // Set the new bin value and return its index.
345  bins_[bin] += weight;
346  return bin;
347 }
348 
349 void Histogram::increment_bin( size_t bin )
350 {
351  accumulate_bin(bin, 1.0);
352 }
353 
354 void Histogram::accumulate_bin( size_t bin, double weight )
355 {
356  if( bin >= bins_.size() ) {
357  throw std::out_of_range("__FUNCTION__: out_of_range");
358  }
359 
360  bins_[bin] += weight;
361 }
362 
363 } // namespace utils
364 } // namespace genesis
genesis::utils::Histogram::bin_midpoint
double bin_midpoint(size_t bin_num) const
Definition: histogram.cpp:254
genesis::utils::Histogram::cend
const_iterator cend() const
Definition: histogram.cpp:235
genesis::utils::Histogram::const_iterator
std::vector< double >::const_iterator const_iterator
Definition: histogram.hpp:83
genesis::utils::equal_ranges
bool equal_ranges(Histogram const &lhs, Histogram const &rhs)
Definition: histogram.cpp:55
genesis::utils::Histogram::out_of_range_behaviour
OutOfRangeBehaviour out_of_range_behaviour() const
Definition: histogram.cpp:157
genesis::utils::Histogram::increment_bin
void increment_bin(size_t bin)
Definition: histogram.cpp:349
genesis::utils::Histogram::check_range
bool check_range(double x) const
Definition: histogram.cpp:306
genesis::utils::Histogram::OutOfRangeBehaviour::kThrow
@ kThrow
genesis::utils::Histogram::range_max
double range_max() const
Definition: histogram.cpp:301
histogram.hpp
Header of Histogram class.
genesis::utils::Histogram::operator==
bool operator==(Histogram const &rhs)
Definition: histogram.cpp:171
genesis::utils::Histogram::end
iterator end()
Definition: histogram.cpp:215
genesis::utils::Histogram::operator[]
double & operator[](size_t bin_num)
Definition: histogram.cpp:196
genesis::utils::Histogram::bin_width
double bin_width(size_t bin_num) const
Definition: histogram.cpp:259
genesis::utils::Histogram::OutOfRangeBehaviour::kSqueeze
@ kSqueeze
genesis::utils::Histogram::set_uniform_ranges
void set_uniform_ranges(const double min, const double max)
Definition: histogram.cpp:120
genesis::utils::swap
void swap(Color &lhs, Color &rhs)
Definition: color.hpp:207
genesis::utils::Histogram::cbegin
const_iterator cbegin() const
Definition: histogram.cpp:230
genesis::utils::Histogram::OutOfRangeBehaviour
OutOfRangeBehaviour
Definition: histogram.hpp:76
genesis::utils::Histogram::accumulate_bin
void accumulate_bin(size_t bin, double weight)
Definition: histogram.cpp:354
genesis::utils::Histogram::bin_range
std::pair< double, double > bin_range(size_t bin_num) const
Definition: histogram.cpp:249
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::Histogram::bins
size_t bins() const
Definition: histogram.cpp:244
genesis::utils::Histogram::iterator
std::vector< double >::iterator iterator
Definition: histogram.hpp:82
genesis::utils::swap
void swap(Histogram &lhs, Histogram &rhs)
Definition: histogram.cpp:47
genesis::utils::Histogram::begin
iterator begin()
Definition: histogram.cpp:210
genesis::utils::Histogram::OutOfRangeBehaviour::kIgnore
@ kIgnore
genesis::utils::Histogram::at
double & at(size_t bin_num)
Definition: histogram.cpp:180
genesis::utils::Histogram::find_bin
long find_bin(double x) const
Definition: histogram.cpp:264
genesis::utils::Histogram::Histogram
Histogram()=default
genesis::utils::Histogram::range_min
double range_min() const
Definition: histogram.cpp:296
genesis::utils::Histogram::accumulate
long accumulate(double x, double weight)
Definition: histogram.cpp:320
genesis::utils::Histogram::clear
void clear()
Definition: histogram.cpp:150
genesis::utils::Histogram::increment
long increment(double x)
Definition: histogram.cpp:315
genesis::utils::Histogram::set_ranges
void set_ranges(const std::vector< double > &ranges)
Definition: histogram.cpp:107
genesis::utils::Histogram
Histogram class for accumulating and summarizing data.
Definition: histogram.hpp:68