A library for working with phylogenetic and population genetic data.
v0.27.0
function_cache.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_FUNCTION_CACHE_H_
2 #define GENESIS_UTILS_CONTAINERS_FUNCTION_CACHE_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2021 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
36 
37 #include <cassert>
38 #include <functional>
39 #include <memory>
40 #include <mutex>
41 #include <stdexcept>
42 #include <tuple>
43 #include <unordered_map>
44 #include <vector>
45 
46 namespace genesis {
47 namespace utils {
48 
49 // =================================================================================================
50 // Simple Cache
51 // =================================================================================================
52 
80 template <typename R, typename... A>
82 {
83  // -------------------------------------------------------------------------
84  // Public Member Types
85  // -------------------------------------------------------------------------
86 
87 public:
88 
89  using size_type = size_t;
90  using key_type = std::tuple<A...>;
91  using value_type = R;
92 
93  // We need a special hash function here that takes care of tuples.
94  // See genesis/utils/containers/hash_tuple.hpp for details.
95  using container_type = std::unordered_map<key_type, value_type, genesis::utils::hash<key_type>>;
96 
97  using const_iterator = typename container_type::const_iterator;
98 
99  // -------------------------------------------------------------------------
100  // Private Member Types
101  // -------------------------------------------------------------------------
102 
103 private:
104 
109  struct Shard
110  {
111  container_type cache_;
112  std::mutex guard_;
113  };
114 
115  // -------------------------------------------------------------------------
116  // Constructor and Rule of Five
117  // -------------------------------------------------------------------------
118 
119 public:
120 
127  FunctionCache( std::function<R(A...)> load_function, size_t shards = 256 )
128  : load_function_( load_function )
129  {
130  if( shards == 0 ) {
131  throw std::invalid_argument( "Cannot initialize FunctionCache with shards==0" );
132  }
133  shards_.reserve( shards );
134  for( size_t i = 0; i < shards; ++i ) {
135  // Although we are in untils namespace here, we specify the namespace full,
136  // in order to avoid ambiguous overload when compiled with C++17.
137  shards_.push_back( genesis::utils::make_unique<Shard>() );
138  }
139  }
140 
141  ~FunctionCache() = default;
142 
143  FunctionCache( FunctionCache const& ) = delete;
144  FunctionCache( FunctionCache&& ) = default;
145 
146  FunctionCache& operator= ( FunctionCache const& ) = delete;
147  FunctionCache& operator= ( FunctionCache&& ) = default;
148 
149  // -------------------------------------------------------------------------
150  // Properties
151  // -------------------------------------------------------------------------
152 
156  size_type size() const
157  {
158  size_t result = 0;
159  for( auto const& shard : shards_ ) {
160  std::lock_guard<std::mutex> lock( shard->guard_ );
161  result += shard->cache_.size();
162  }
163  return result;
164  }
165 
171  std::vector<size_t> shard_sizes() const
172  {
173  auto result = std::vector<size_t>( shards_.size(), 0 );
174  for( size_t i = 0; i < shards_.size(); ++i ) {
175  auto const& shard = shards_[i];
176  std::lock_guard<std::mutex> lock( shard->guard_ );
177  result[i] = shard->cache_.size();
178  }
179  return result;
180  }
181 
185  bool empty() const
186  {
187  // We need to traverse all shards to make sure all of them are empty.
188  return size() == 0;
189  }
190 
197  void clear()
198  {
199  for( auto& shard : shards_ ) {
200  std::lock_guard<std::mutex> lock( shard->guard_ );
201  shard->cache_.clear();
202  }
203  }
204 
210  void enabled( bool value )
211  {
212  enabled_ = value;
213  }
214 
218  bool enabled() const
219  {
220  return enabled_;
221  }
222 
227  size_t hit_count() const
228  {
229  return hit_count_;
230  }
231 
236  size_t miss_count() const
237  {
238  return miss_count_;
239  }
240 
245  {
246  hit_count_ = 0;
247  miss_count_ = 0;
248  }
249 
250  // -------------------------------------------------------------------------
251  // Element Access
252  // -------------------------------------------------------------------------
253 
259  R const& operator()( A const&... arguments )
260  {
261  // Build the key. Can be done without mutex locking.
262  std::tuple<A...> const key( arguments... );
263 
264  // Get the shard that this key belongs to.
265  assert( shard_index_( key ) < shards_.size() );
266  assert( shards_[ shard_index_( key ) ] );
267  auto& shard = *shards_[ shard_index_( key ) ];
268 
269  // Lock access to the shard to look up the key. Released at the end of the scope.
270  std::lock_guard<std::mutex> lock( shard.guard_ );
271 
272  // Allow to disable the caching completely, for example for speed testing.
273  // We still need to store it, in order to be able to return a reference...
274  if( ! enabled_ ) {
275  return shard.cache_[key] = load_function_( arguments... );
276  }
277 
278  // Now try to find it in the cache.
279  auto search = shard.cache_.find( key );
280  if( search != shard.cache_.end() ) {
281  ++hit_count_;
282  return search->second;
283  }
284 
285  // If not present, compute, store, and return it.
286  ++miss_count_;
287  return shard.cache_[key] = load_function_( arguments... );
288  }
289 
293  bool contains( A const&... arguments ) const
294  {
295  // Simply follow the steps of the above operator(), but only check the presence of the key,
296  // without computing the function.
297  std::tuple<A...> const key( arguments... );
298  auto& shard = *shards_[ shard_index_( key ) ];
299  std::lock_guard<std::mutex> lock( shard.guard_ );
300  auto search = shard.cache_.find( key );
301  return search != shard.cache_.end();
302  }
303 
304  // -------------------------------------------------------------------------
305  // Private Members
306  // -------------------------------------------------------------------------
307 
308 private:
309 
310  size_t shard_index_( key_type const& key ) const
311  {
312  return genesis::utils::hash<key_type>{}( key ) % shards_.size();
313  }
314 
315  // -------------------------------------------------------------------------
316  // Data Members
317  // -------------------------------------------------------------------------
318 
319 private:
320 
321  // The functor that computes/loads the value for a given key. This needs to be a pure function,
322  // that is, a function without any side effects that for a given key always returns the same
323  // result. In fact, it does not really need to be pure, but the cache would just return
324  // its cached value instead of calling the function again, and hence, its side effects would
325  // not happen... So, let's say, we only want pure functions.
326  std::function<R(A...)> load_function_;
327 
328  // Set of shards to distribute the locking, in order to allow for fast parallel access
329  // from many threads. Unfortunately, we need pointers here, as the mutexes in the shards
330  // are not copyable.
331  std::vector<std::unique_ptr<Shard>> shards_;
332 
333  // For speed testing, we offer to deactivate the cache completely.
334  bool enabled_ = true;
335 
336  // We also count how often we have cache hits (key already present), and cache
337  // misses (key not present and load function needs to be computed).
338  size_t hit_count_ = 0;
339  size_t miss_count_ = 0;
340 
341 };
342 
343 } // namespace utils
344 } // namespace genesis
345 
346 #endif // include guard
genesis::utils::FunctionCache::empty
bool empty() const
Return whether the cache is currently empty.
Definition: function_cache.hpp:185
genesis::utils::FunctionCache::enabled
void enabled(bool value)
Enable or disable the caching.
Definition: function_cache.hpp:210
genesis::utils::FunctionCache
Simple cache, for example for function return values.
Definition: function_cache.hpp:81
genesis::utils::FunctionCache::miss_count
size_t miss_count() const
Return how often in total there was a cache miss, that is, a key requested that was not yet present i...
Definition: function_cache.hpp:236
genesis::utils::FunctionCache::contains
bool contains(A const &... arguments) const
Return whether a certain key is already in the cache.
Definition: function_cache.hpp:293
std.hpp
Provides some valuable additions to STD.
genesis::utils::FunctionCache::operator()
R const & operator()(A const &... arguments)
Access operator for retrieving a value given the key (arguments to the cached function).
Definition: function_cache.hpp:259
genesis::utils::FunctionCache::shard_sizes
std::vector< size_t > shard_sizes() const
Get a tally of how many cached values are stored in each of the shards.
Definition: function_cache.hpp:171
genesis::utils::hash
Definition: hash_tuple.hpp:76
genesis::utils::FunctionCache::clear_counts
void clear_counts()
Clear the hit_count() and miss_count() counters.
Definition: function_cache.hpp:244
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::FunctionCache::hit_count
size_t hit_count() const
Return how often in total there was a cache hit, that is, a key requested that was already present in...
Definition: function_cache.hpp:227
genesis::utils::FunctionCache::FunctionCache
FunctionCache(std::function< R(A...)> load_function, size_t shards=256)
Constructor, takes the function that is needed to compute values for the cache.
Definition: function_cache.hpp:127
genesis::utils::FunctionCache::operator=
FunctionCache & operator=(FunctionCache const &)=delete
genesis::utils::FunctionCache::~FunctionCache
~FunctionCache()=default
genesis::utils::FunctionCache::enabled
bool enabled() const
Return whether the cache is currently enabled or not.
Definition: function_cache.hpp:218
genesis::utils::FunctionCache::value_type
R value_type
Definition: function_cache.hpp:91
genesis::utils::FunctionCache::container_type
std::unordered_map< key_type, value_type, genesis::utils::hash< key_type > > container_type
Definition: function_cache.hpp:95
hash_tuple.hpp
genesis::utils::FunctionCache::const_iterator
typename container_type::const_iterator const_iterator
Definition: function_cache.hpp:97
genesis::utils::FunctionCache::clear
void clear()
Clear the cache.
Definition: function_cache.hpp:197
genesis::utils::FunctionCache::size
size_type size() const
Get the current count of elements in the cache.
Definition: function_cache.hpp:156
genesis::utils::FunctionCache::key_type
std::tuple< A... > key_type
Definition: function_cache.hpp:90
genesis::utils::FunctionCache::size_type
size_t size_type
Definition: function_cache.hpp:89