A library for working with phylogenetic data.
v0.25.0
function_cache.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_FUNCTION_CACHE_H_
2 #define GENESIS_UTILS_CONTAINERS_FUNCTION_CACHE_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2020 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lucas.czech@h-its.org>
23  Exelixis Lab, Heidelberg Institute for Theoretical Studies
24  Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
25 */
26 
36 
37 #include <cassert>
38 #include <functional>
39 #include <memory>
40 #include <mutex>
41 #include <stdexcept>
42 #include <tuple>
43 #include <unordered_map>
44 #include <vector>
45 
46 namespace genesis {
47 namespace utils {
48 
49 // =================================================================================================
50 // Simple Cache
51 // =================================================================================================
52 
80 template <typename R, typename... A>
82 {
83  // -------------------------------------------------------------------------
84  // Public Member Types
85  // -------------------------------------------------------------------------
86 
87 public:
88 
89  using size_type = size_t;
90  using key_type = std::tuple<A...>;
91  using value_type = R;
92 
93  // We need a special hash function here that takes care of tuples.
94  // See genesis/utils/containers/hash_tuple.hpp for details.
95  using container_type = std::unordered_map<key_type, value_type, genesis::utils::hash<key_type>>;
96 
97  using const_iterator = typename container_type::const_iterator;
98 
99  // -------------------------------------------------------------------------
100  // Private Member Types
101  // -------------------------------------------------------------------------
102 
103 private:
104 
109  struct Shard
110  {
111  container_type cache_;
112  std::mutex guard_;
113  };
114 
115  // -------------------------------------------------------------------------
116  // Constructor and Rule of Five
117  // -------------------------------------------------------------------------
118 
119 public:
120 
127  FunctionCache( std::function<R(A...)> load_function, size_t shards = 256 )
128  : load_function_( load_function )
129  {
130  if( shards == 0 ) {
131  throw std::invalid_argument( "Cannot initialize FunctionCache with shards==0" );
132  }
133  shards_.reserve( shards );
134  for( size_t i = 0; i < shards; ++i ) {
135  shards_.push_back( make_unique<Shard>() );
136  }
137  }
138 
139  ~FunctionCache() = default;
140 
141  FunctionCache( FunctionCache const& ) = delete;
142  FunctionCache( FunctionCache&& ) = default;
143 
144  FunctionCache& operator= ( FunctionCache const& ) = delete;
145  FunctionCache& operator= ( FunctionCache&& ) = default;
146 
147  // -------------------------------------------------------------------------
148  // Properties
149  // -------------------------------------------------------------------------
150 
154  size_type size() const
155  {
156  size_t result = 0;
157  for( auto const& shard : shards_ ) {
158  std::lock_guard<std::mutex> lock( shard->guard_ );
159  result += shard->cache_.size();
160  }
161  return result;
162  }
163 
169  std::vector<size_t> shard_sizes() const
170  {
171  auto result = std::vector<size_t>( shards_.size(), 0 );
172  for( size_t i = 0; i < shards_.size(); ++i ) {
173  auto const& shard = shards_[i];
174  std::lock_guard<std::mutex> lock( shard->guard_ );
175  result[i] = shard->cache_.size();
176  }
177  return result;
178  }
179 
183  bool empty() const
184  {
185  // We need to traverse all shards to make sure all of them are empty.
186  return size() == 0;
187  }
188 
195  void clear()
196  {
197  for( auto& shard : shards_ ) {
198  std::lock_guard<std::mutex> lock( shard->guard_ );
199  shard->cache_.clear();
200  }
201  }
202 
208  void enabled( bool value )
209  {
210  enabled_ = value;
211  }
212 
216  bool enabled() const
217  {
218  return enabled_;
219  }
220 
225  size_t hit_count() const
226  {
227  return hit_count_;
228  }
229 
234  size_t miss_count() const
235  {
236  return miss_count_;
237  }
238 
243  {
244  hit_count_ = 0;
245  miss_count_ = 0;
246  }
247 
248  // -------------------------------------------------------------------------
249  // Element Access
250  // -------------------------------------------------------------------------
251 
257  R const& operator()( A const&... arguments )
258  {
259  // Build the key. Can be done without mutex locking.
260  std::tuple<A...> const key( arguments... );
261 
262  // Get the shard that this key belongs to.
263  assert( shard_index_( key ) < shards_.size() );
264  assert( shards_[ shard_index_( key ) ] );
265  auto& shard = *shards_[ shard_index_( key ) ];
266 
267  // Lock access to the shard to look up the key. Released at the end of the scope.
268  std::lock_guard<std::mutex> lock( shard.guard_ );
269 
270  // Allow to disable the caching completely, for example for speed testing.
271  // We still need to store it, in order to be able to return a reference...
272  if( ! enabled_ ) {
273  return shard.cache_[key] = load_function_( arguments... );
274  }
275 
276  // Now try to find it in the cache.
277  auto search = shard.cache_.find( key );
278  if( search != shard.cache_.end() ) {
279  ++hit_count_;
280  return search->second;
281  }
282 
283  // If not present, compute, store, and return it.
284  ++miss_count_;
285  return shard.cache_[key] = load_function_( arguments... );
286  }
287 
291  bool contains( A const&... arguments ) const
292  {
293  // Simply follow the steps of the above operator(), but only check the presence of the key,
294  // without computing the function.
295  std::tuple<A...> const key( arguments... );
296  auto& shard = *shards_[ shard_index_( key ) ];
297  std::lock_guard<std::mutex> lock( shard.guard_ );
298  auto search = shard.cache_.find( key );
299  return search != shard.cache_.end();
300  }
301 
302  // -------------------------------------------------------------------------
303  // Private Members
304  // -------------------------------------------------------------------------
305 
306 private:
307 
308  size_t shard_index_( key_type const& key ) const
309  {
310  return genesis::utils::hash<key_type>{}( key ) % shards_.size();
311  }
312 
313  // -------------------------------------------------------------------------
314  // Data Members
315  // -------------------------------------------------------------------------
316 
317 private:
318 
319  // The functor that computes/loads the value for a given key. This needs to be a pure function,
320  // that is, a function without any side effects that for a given key always returns the same
321  // result. In fact, it does not really need to be pure, but the cache would just return
322  // its cached value instead of calling the function again, and hence, its side effects would
323  // not happen... So, let's say, we only want pure functions.
324  std::function<R(A...)> load_function_;
325 
326  // Set of shards to distribute the locking, in order to allow for fast parallel access
327  // from many threads. Unfortunately, we need pointers here, as the mutexes in the shards
328  // are not copyable.
329  std::vector<std::unique_ptr<Shard>> shards_;
330 
331  // For speed testing, we offer to deactivate the cache completely.
332  bool enabled_ = true;
333 
334  // We also count how often we have cache hits (key already present), and cache
335  // misses (key not present and load function needs to be computed).
336  size_t hit_count_ = 0;
337  size_t miss_count_ = 0;
338 
339 };
340 
341 } // namespace utils
342 } // namespace genesis
343 
344 #endif // include guard
genesis::utils::FunctionCache::empty
bool empty() const
Return whether the cache is currently empty.
Definition: function_cache.hpp:183
genesis::utils::FunctionCache::enabled
void enabled(bool value)
Enable or disable the caching.
Definition: function_cache.hpp:208
genesis::utils::FunctionCache
Simple cache, for example for function return values.
Definition: function_cache.hpp:81
genesis::utils::FunctionCache::miss_count
size_t miss_count() const
Return how often in total there was a cache miss, that is, a key requested that was not yet present i...
Definition: function_cache.hpp:234
genesis::utils::FunctionCache::contains
bool contains(A const &... arguments) const
Return whether a certain key is already in the cache.
Definition: function_cache.hpp:291
std.hpp
Provides some valuable additions to STD.
genesis::utils::FunctionCache::operator()
R const & operator()(A const &... arguments)
Access operator for retrieving a value given the key (arguments to the cached function).
Definition: function_cache.hpp:257
genesis::utils::FunctionCache::shard_sizes
std::vector< size_t > shard_sizes() const
Get a tally of how many cached values are stored in each of the shards.
Definition: function_cache.hpp:169
genesis::utils::hash
Definition: hash_tuple.hpp:76
genesis::utils::FunctionCache::clear_counts
void clear_counts()
Clear the hit_count() and miss_count() counters.
Definition: function_cache.hpp:242
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::FunctionCache::hit_count
size_t hit_count() const
Return how often in total there was a cache hit, that is, a key requested that was already present in...
Definition: function_cache.hpp:225
genesis::utils::FunctionCache::FunctionCache
FunctionCache(std::function< R(A...)> load_function, size_t shards=256)
Constructor, takes the function that is needed to compute values for the cache.
Definition: function_cache.hpp:127
genesis::utils::FunctionCache::operator=
FunctionCache & operator=(FunctionCache const &)=delete
genesis::utils::FunctionCache::~FunctionCache
~FunctionCache()=default
genesis::utils::FunctionCache::enabled
bool enabled() const
Return whether the cache is currently enabled or not.
Definition: function_cache.hpp:216
genesis::utils::FunctionCache::value_type
R value_type
Definition: function_cache.hpp:91
genesis::utils::FunctionCache::container_type
std::unordered_map< key_type, value_type, genesis::utils::hash< key_type > > container_type
Definition: function_cache.hpp:95
hash_tuple.hpp
genesis::utils::FunctionCache::const_iterator
typename container_type::const_iterator const_iterator
Definition: function_cache.hpp:97
genesis::utils::FunctionCache::clear
void clear()
Clear the cache.
Definition: function_cache.hpp:195
genesis::utils::FunctionCache::size
size_type size() const
Get the current count of elements in the cache.
Definition: function_cache.hpp:154
genesis::utils::FunctionCache::key_type
std::tuple< A... > key_type
Definition: function_cache.hpp:90
genesis::utils::FunctionCache::size_type
size_t size_type
Definition: function_cache.hpp:89