A toolkit for working with phylogenetic data.
v0.18.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
histogram.cpp
Go to the documentation of this file.
1 /*
2  Genesis - A toolkit for working with phylogenetic data.
3  Copyright (C) 2014-2017 Lucas Czech
4 
5  This program is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program. If not, see <http://www.gnu.org/licenses/>.
17 
18  Contact:
19  Lucas Czech <lucas.czech@h-its.org>
20  Exelixis Lab, Heidelberg Institute for Theoretical Studies
21  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
22 */
23 
32 
33 #include <algorithm>
34 #include <assert.h>
35 #include <cmath>
36 #include <iterator>
37 #include <numeric>
38 #include <stdexcept>
39 
40 namespace genesis {
41 namespace utils {
42 
43 // =================================================================================================
44 // Friends
45 // =================================================================================================
46 
47 void swap( Histogram& lhs, Histogram& rhs ) noexcept
48 {
49  using std::swap;
50  swap( lhs.bins_, rhs.bins_);
51  swap( lhs.ranges_, rhs.ranges_);
52  swap( lhs.out_of_range_behaviour_, rhs.out_of_range_behaviour_);
53 }
54 
55 bool equal_ranges( Histogram const& lhs, Histogram const& rhs )
56 {
57  return lhs.ranges_ == rhs.ranges_;
58 }
59 
60 // =================================================================================================
61 // Constructors and Rule of Five
62 // =================================================================================================
63 
65  const size_t num_bins
66 )
67  : Histogram(num_bins, 0.0, 1.0)
68 {
69  // Nothing to do here.
70 }
71 
73  const size_t num_bins,
74  const double range_min,
75  const double range_max
76 )
77  : bins_ (num_bins)
78  , ranges_ (num_bins + 1)
79  , out_of_range_behaviour_ (OutOfRangeBehaviour::kSqueeze)
80 {
81  if (num_bins == 0) {
82  throw std::domain_error("__FUNCTION__: domain_error");
83  }
84 
85  set_uniform_ranges(range_min, range_max);
86 }
87 
89  const std::vector<double>& ranges
90 )
91  : bins_ (ranges.size() - 1, 0.0)
92  , ranges_ (ranges)
93  , out_of_range_behaviour_ (OutOfRangeBehaviour::kSqueeze)
94 {
95  if (ranges.size() < 2) {
96  throw std::domain_error("__FUNCTION__: domain_error");
97  }
98  if (!std::is_sorted(ranges.begin(), ranges.end())) {
99  throw std::invalid_argument("__FUNCTION__: invalid_argument");
100  }
101 }
102 
103 // =================================================================================================
104 // General Methods
105 // =================================================================================================
106 
107 void Histogram::set_ranges( const std::vector<double>& ranges )
108 {
109  if (ranges.size() != ranges_.size() || !std::is_sorted(ranges.begin(), ranges.end())) {
110  throw std::invalid_argument("__FUNCTION__: invalid_argument");
111  }
112 
113  ranges_ = ranges;
114  clear();
115 }
116 
120 void Histogram::set_uniform_ranges( const double min, const double max )
121 {
122  if (min >= max) {
123  throw std::invalid_argument("__FUNCTION__: invalid_argument");
124  }
125 
126  // const size_t num_bins = bins();
127  // const double bin_width = (max - min) / num_bins;
128  //
129  // for (size_t i = 0; i < num_bins + 1; ++i) {
130  // ranges_[i] = min + static_cast<double>(i) * bin_width;
131  // }
132 
133  // More stable version from the GNU Scientific Library.
134  const double n = static_cast<double>(bins());
135  for (size_t i = 0; i < bins() + 1; ++i) {
136  const double p = static_cast<double>(i);
137 
138  const double f1 = (n - p) / n;
139  const double f2 = p / n;
140 
141  ranges_[i] = f1 * min + f2 * max;
142  }
143 
144  clear();
145 }
146 
151 {
152  for (auto& b : bins_) {
153  b = 0.0;
154  }
155 }
156 
158 {
159  return out_of_range_behaviour_;
160 }
161 
163 {
164  out_of_range_behaviour_ = v;
165 }
166 
167 // =================================================================================================
168 // Comparison
169 // =================================================================================================
170 
172 {
173  return bins_ == rhs.bins_ && ranges_ == rhs.ranges_;
174 }
175 
176 // =================================================================================================
177 // Bin Access
178 // =================================================================================================
179 
180 double& Histogram::at ( size_t bin_num )
181 {
182  if (bin_num >= bins_.size()) {
183  throw std::out_of_range("__FUNCTION__: out_of_range");
184  }
185  return bins_.at(bin_num);
186 }
187 
188 double Histogram::at ( size_t bin_num ) const
189 {
190  if (bin_num >= bins_.size()) {
191  throw std::out_of_range("__FUNCTION__: out_of_range");
192  }
193  return bins_.at(bin_num);
194 }
195 
196 double& Histogram::operator [] ( size_t bin_num )
197 {
198  return bins_[bin_num];
199 }
200 
201 double Histogram::operator [] ( size_t bin_num ) const
202 {
203  return bins_[bin_num];
204 }
205 
206 // =================================================================================================
207 // Bin Iterators
208 // =================================================================================================
209 
211 {
212  return bins_.begin();
213 }
214 
216 {
217  return bins_.end();
218 }
219 
221 {
222  return bins_.cbegin();
223 }
224 
226 {
227  return bins_.cend();
228 }
229 
231 {
232  return bins_.cbegin();
233 }
234 
236 {
237  return bins_.cend();
238 }
239 
240 // =================================================================================================
241 // Properties
242 // =================================================================================================
243 
244 size_t Histogram::bins() const
245 {
246  return bins_.size();
247 }
248 
249 std::pair<double, double> Histogram::bin_range( size_t bin_num ) const
250 {
251  return std::make_pair(ranges_[bin_num], ranges_[bin_num + 1]);
252 }
253 
254 double Histogram::bin_midpoint( size_t bin_num ) const
255 {
256  return (ranges_[bin_num] + ranges_[bin_num + 1]) / 2;
257 }
258 
259 double Histogram::bin_width( size_t bin_num ) const
260 {
261  return ranges_[bin_num + 1] - ranges_[bin_num];
262 }
263 
264 int Histogram::find_bin( double x ) const
265 {
266  const auto r_min = range_min();
267  const auto r_max = range_max();
268 
269  if (x < r_min) {
270  return -1;
271  }
272  if (x >= r_max) {
273  return bins();
274  }
275 
276  // Get the bin for the uniform ranges case.
277  const double bin_width = (r_max - r_min) / static_cast<double>( bins() );
278  const int bin = static_cast<int>( std::floor( (x - r_min) / bin_width ) );
279 
280  // With the calculation above, we always end up within the boundaries.
281  assert(r_min <= x && x < r_max);
282 
283  // Now check if we actually found the correct bin. If so, simply return the bin number.
284  if (ranges_[bin] <= x && x < ranges_[bin + 1]) {
285  return bin;
286  }
287 
288  // If not, do a binary search.
289  const auto it = std::upper_bound(ranges_.begin(), ranges_.end(), x);
290  return std::distance(ranges_.begin(), it) - 1;
291 }
292 
293 double Histogram::range_min() const
294 {
295  return ranges_.front();
296 }
297 
298 double Histogram::range_max() const
299 {
300  return ranges_.back();
301 }
302 
303 bool Histogram::check_range(double x) const
304 {
305  return range_min() <= x && x < range_max();
306 }
307 
308 // =================================================================================================
309 // Modifiers
310 // =================================================================================================
311 
312 int Histogram::increment( double x )
313 {
314  return accumulate(x, 1.0);
315 }
316 
317 int Histogram::accumulate( double x, double weight )
318 {
319  int bin = find_bin(x);
320 
321  // Do boundary check on the bin.
322  // The cast is only done if we already know that bin >= 0, so it is always valid.
323  if (bin < 0 || static_cast <size_t>(bin) >= bins()) {
324  switch (out_of_range_behaviour()) {
326  return bin;
327 
329  if (bin < 0) {
330  bin = 0;
331  } else {
332  bin = bins() - 1;
333  }
334  break;
335 
337  throw std::out_of_range("__FUNCTION__: out_of_range");
338  }
339  }
340 
341  // Set the new bin value and return its index.
342  bins_[bin] += weight;
343  return bin;
344 }
345 
346 void Histogram::increment_bin( size_t bin )
347 {
348  accumulate_bin(bin, 1.0);
349 }
350 
351 void Histogram::accumulate_bin( size_t bin, double weight )
352 {
353  if (bin >= bins()) {
354  throw std::out_of_range("__FUNCTION__: out_of_range");
355  }
356 
357  bins_[bin] += weight;
358 }
359 
360 } // namespace utils
361 } // namespace genesis
size_t bins() const
Definition: histogram.cpp:244
Header of Histogram class.
std::vector< double >::iterator iterator
Definition: histogram.hpp:82
std::vector< double >::const_iterator const_iterator
Definition: histogram.hpp:83
void increment_bin(size_t bin)
Definition: histogram.cpp:346
double range_max() const
Definition: histogram.cpp:298
bool operator==(Histogram const &rhs)
Definition: histogram.cpp:171
Histogram class for accumulating and summarizing data.
Definition: histogram.hpp:68
int find_bin(double x) const
Definition: histogram.cpp:264
bool equal_ranges(Histogram const &lhs, Histogram const &rhs)
Definition: histogram.cpp:55
double bin_width(size_t bin_num) const
Definition: histogram.cpp:259
double & operator[](size_t bin_num)
Definition: histogram.cpp:196
double bin_midpoint(size_t bin_num) const
Definition: histogram.cpp:254
bool check_range(double x) const
Definition: histogram.cpp:303
void swap(NexusTaxa &lhs, NexusTaxa &rhs)
Definition: taxa.hpp:207
void swap(Histogram &lhs, Histogram &rhs) noexcept
Definition: histogram.cpp:47
void accumulate_bin(size_t bin, double weight)
Definition: histogram.cpp:351
OutOfRangeBehaviour out_of_range_behaviour() const
Definition: histogram.cpp:157
int accumulate(double x, double weight)
Definition: histogram.cpp:317
std::pair< double, double > bin_range(size_t bin_num) const
Definition: histogram.cpp:249
double & at(size_t bin_num)
Definition: histogram.cpp:180
Histogram(size_t num_bins)
Definition: histogram.cpp:64
const_iterator cbegin() const
Definition: histogram.cpp:230
void set_uniform_ranges(const double min, const double max)
Definition: histogram.cpp:120
double range_min() const
Definition: histogram.cpp:293
void set_ranges(const std::vector< double > &ranges)
Definition: histogram.cpp:107
const_iterator cend() const
Definition: histogram.cpp:235