A library for working with phylogenetic and population genetic data.
v0.32.0
mru_cache.hpp
Go to the documentation of this file.
1 #ifndef GENESIS_UTILS_CONTAINERS_MRU_CACHE_H_
2 #define GENESIS_UTILS_CONTAINERS_MRU_CACHE_H_
3 
4 /*
5  Genesis - A toolkit for working with phylogenetic data.
6  Copyright (C) 2014-2024 Lucas Czech
7 
8  This program is free software: you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation, either version 3 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 
21  Contact:
22  Lucas Czech <lczech@carnegiescience.edu>
23  Department of Plant Biology, Carnegie Institution For Science
24  260 Panama Street, Stanford, CA 94305, USA
25 */
26 
34 #include <cassert>
35 #include <functional>
36 #include <list>
37 #include <mutex>
38 #include <stdexcept>
39 #include <unordered_map>
40 #include <utility>
41 
42 namespace genesis {
43 namespace utils {
44 
45 // =================================================================================================
46 // MRU Cache
47 // =================================================================================================
48 
99 template< typename Key, typename T >
100 class MruCache
101 {
102 public:
103 
104  // -------------------------------------------------------------------------
105  // Member Types
106  // -------------------------------------------------------------------------
107 
108  using key_type = Key;
109  using mapped_type = T;
110  using value_type = std::pair<const key_type, mapped_type>;
111 
112  using iterator = typename std::list< value_type >::iterator;
113  using const_iterator = typename std::list< value_type >::const_iterator;
114 
115  using size_type = size_t;
116 
117  // -------------------------------------------------------------------------
118  // Constructor and Rule of Five
119  // -------------------------------------------------------------------------
120 
127  MruCache() = default;
128 
135  explicit MruCache( size_t capacity )
136  : capacity_( capacity )
137  {}
138 
140  {
141  // Need to call the release_function. Make it easy, just call clear.
142  clear();
143  }
144 
145  MruCache( MruCache const& ) = delete;
146  MruCache( MruCache&& ) = default;
147 
148  MruCache& operator= ( MruCache const& ) = delete;
149  MruCache& operator= ( MruCache&& ) = default;
150 
151  // -------------------------------------------------------------------------
152  // Iterators
153  // -------------------------------------------------------------------------
154 
159  {
160  return cache_.begin();
161  }
162 
167  {
168  return cache_.cbegin();
169  }
170 
176  {
177  return cache_.end();
178  }
179 
185  {
186  return cache_.cend();
187  }
188 
193  {
194  return cache_.cbegin();
195  }
196 
201  {
202  return cache_.cend();
203  }
204 
205  // -------------------------------------------------------------------------
206  // Properties
207  // -------------------------------------------------------------------------
208 
212  size_type size() const
213  {
214  assert( lookup_.size() == cache_.size() );
215  return lookup_.size();
216  }
217 
224  {
225  assert( capacity_ == 0 || lookup_.size() <= capacity_ );
226  return capacity_;
227  }
228 
232  bool empty() const
233  {
234  return lookup_.empty();
235  }
236 
243  size_t cache_hits() const
244  {
245  return cache_hits_;
246  }
247 
255  size_t cache_misses() const
256  {
257  return cache_misses_;
258  }
259 
260  // -------------------------------------------------------------------------
261  // Element Access
262  // -------------------------------------------------------------------------
263 
286  mapped_type& fetch( key_type const& key )
287  {
288  // Use the lookup map to check whether the element is in the cache.
289  auto const lit = lookup_.find( key );
290 
291  // Found it! Move it to the front and return its mapped element.
292  if( lit != lookup_.end() ) {
293  ++cache_hits_;
294  return promote_and_get_( lit->second );
295  }
296 
297  // If we are here, the element was not found.
298  // Add the element to the cache and lookup. Throws if the load function is empty.
299  // Also, if loading fails with an exception, nothing happens to the container.
300  assert( lookup_.count( key ) == 0 );
301  cache_.push_front({ key, load_function( key ) });
302  lookup_[ key ] = cache_.begin();
303  assert( cache_.size() == lookup_.size() );
304  ++cache_misses_;
305 
306  // Make sure that we stay within capacity.
307  shrink_();
308 
309  // Finally, return the element.
310  assert( ! cache_.empty() );
311  return cache_.begin()->second;
312  }
313 
353  {
354  // First, check if the elemt is there. If so, simply return it.
355  {
356  // Lock access to everything.
357  std::lock_guard<std::mutex> lock( mutex_ );
358 
359  // Use the lookup map to check whether the element is in the cache.
360  auto const lit = lookup_.find( key );
361 
362  // Found it! Move it to the front and return its mapped element.
363  if( lit != lookup_.end() ) {
364  // LOG_DBG << "Found.";
365  ++cache_hits_;
366  return promote_and_get_( lit->second );
367  }
368  }
369 
370  // If we are here, the element is not in the cache. Load it into a dummy list,
371  // without locking, so that loading can happen in parallel.
372  std::list< value_type > tmp_list;
373  tmp_list.push_front({ key, load_function( key ) });
374 
375  // Now, store it if needed.
376  {
377  // Lock access to everything.
378  // We increment the cache misses here, to have the lock active on the variable.
379  std::lock_guard<std::mutex> lock( mutex_ );
380  ++cache_misses_;
381 
382  // Check whether the element was put into the cache in the meantime (by another thread).
383  auto const lit = lookup_.find( key );
384 
385  // Found it! Move it to the front and return its mapped element.
386  // In that case, we do not use tmp_list, but discard the element in there.
387  if( lit != lookup_.end() ) {
388  // LOG_DBG << "Wasted.";
389  return promote_and_get_( lit->second );
390  }
391  // LOG_DBG << "Loaded.";
392 
393  // If we are here, the element was not found.
394  // Add the element to the cache and lookup.
395  assert( lookup_.count( key ) == 0 );
396  assert( tmp_list.size() == 1 );
397  cache_.splice( cache_.begin(), tmp_list, tmp_list.begin() );
398  lookup_[ key ] = cache_.begin();
399  assert( cache_.size() == lookup_.size() );
400 
401  // Make sure that we stay within capacity.
402  shrink_();
403 
404  // Finally, return the element.
405  assert( ! cache_.empty() );
406  return cache_.begin()->second;
407  }
408  }
409 
415  bool contains( key_type const& key )
416  {
417  return lookup_.count( key ) > 0;
418  }
419 
431  void touch( key_type const& key )
432  {
433  (void) fetch_copy( key );
434  }
435 
436  // -------------------------------------------------------------------------
437  // Modifiers
438  // -------------------------------------------------------------------------
439 
445  void capacity( size_t value )
446  {
447  // Lock access to everything. It would be a weird use case where this function is called
448  // while also fetching copies of elements in a different thread, but still,
449  // we need to protect this.
450  std::lock_guard<std::mutex> lock( mutex_ );
451 
452  capacity_ = value;
453  shrink_();
454  assert( capacity_ == 0 || size() <= capacity_ );
455  }
456 
460  void clear()
461  {
462  // Lock access to everything.
463  std::lock_guard<std::mutex> lock( mutex_ );
464 
465  if( release_function ) {
466  for( auto& elem : cache_ ) {
467  release_function( elem.first, elem.second );
468  }
469  }
470  cache_.clear();
471  lookup_.clear();
472  }
473 
474  // -------------------------------------------------------------------------
475  // Implementation Functions
476  // -------------------------------------------------------------------------
477 
478 private:
479 
483  void shrink_()
484  {
485  // Capacity/target size of 0 means limitless. No shrinking.
486  if( capacity_ == 0 ) {
487  return;
488  }
489 
490  // Remove last elements from cache and lookup if needed.
491  while( size() > capacity_ ) {
492 
493  // This needs to be maintained.
494  assert( lookup_.size() == cache_.size() );
495 
496  // If we are here, size > capacity > 0. Hence, size > 1.
497  assert( cache_.size() > 1 );
498 
499  auto& last = cache_.back();
500  if( release_function ) {
501  release_function( last.first, last.second );
502  }
503  lookup_.erase( last.first );
504  cache_.pop_back();
505  }
506 
507  // Check invariant and result.
508  assert( lookup_.size() == cache_.size() );
509  assert( lookup_.size() <= capacity() );
510  }
511 
516  mapped_type& promote_and_get_( iterator const& cache_it )
517  {
518  // The function is internally called only if we found an iterator.
519  // This can of course only happen if there are elements in the cache.
520  assert( ! cache_.empty() );
521 
522  // Only move if it is not already at the front.
523  if( cache_it != cache_.begin() ) {
524  cache_.splice( cache_.begin(), cache_, cache_it );
525  }
526 
527  // Return the element.
528  return cache_it->second;
529  }
530 
531  // -------------------------------------------------------------------------
532  // Data Members
533  // -------------------------------------------------------------------------
534 
535 public:
536 
543  std::function< mapped_type( key_type const& )> load_function;
544 
553  std::function< void( key_type const&, mapped_type& )> release_function;
554 
555 private:
556 
563  size_type capacity_ = 0;
564 
570  std::list< value_type > cache_;
571 
578  std::unordered_map< key_type, iterator > lookup_;
579 
583  std::mutex mutex_;
584 
588  size_t cache_hits_ = 0;
589 
593  size_t cache_misses_ = 0;
594 
595 };
596 
597 } // namespace utils
598 } // namespace genesis
599 
600 #endif // include guard
genesis::utils::MruCache::load_function
std::function< mapped_type(key_type const &)> load_function
Function to load an element into the cache if it is being fetched but not there yet.
Definition: mru_cache.hpp:543
genesis::utils::MruCache::begin
const_iterator begin() const
Get a const iterator to the begin of the cache, that is, the most recently used element.
Definition: mru_cache.hpp:166
genesis::utils::MruCache::operator=
MruCache & operator=(MruCache const &)=delete
genesis::utils::MruCache
Most Recently Used Cache.
Definition: mru_cache.hpp:100
genesis::utils::MruCache::mapped_type
T mapped_type
Definition: mru_cache.hpp:109
genesis::utils::MruCache::end
const_iterator end() const
Get a const iterator to past-the-end of the cache, that is, one element after the least recently used...
Definition: mru_cache.hpp:184
genesis::utils::MruCache::size
size_type size() const
Get the current count of elements being loaded in the cache.
Definition: mru_cache.hpp:212
genesis::utils::MruCache::~MruCache
~MruCache()
Definition: mru_cache.hpp:139
genesis::utils::MruCache::MruCache
MruCache()=default
Default constructor. Uses a capacity() of 0, that is, limitless.
genesis::utils::MruCache::capacity
void capacity(size_t value)
Set the capacity of the cache, i.e., its maximal size.
Definition: mru_cache.hpp:445
genesis::utils::MruCache::size_type
size_t size_type
Definition: mru_cache.hpp:115
genesis::utils::MruCache::fetch_copy
mapped_type fetch_copy(key_type const &key)
Get an element by copy.
Definition: mru_cache.hpp:352
genesis::utils::MruCache::MruCache
MruCache(size_t capacity)
Construct a cache with a given capacity.
Definition: mru_cache.hpp:135
genesis::utils::MruCache::cbegin
const_iterator cbegin()
Get a const iterator to the begin of the cache, that is, the most recently used element.
Definition: mru_cache.hpp:192
genesis
Container namespace for all symbols of genesis in order to keep them separate when used as a library.
Definition: placement/formats/edge_color.cpp:42
genesis::utils::MruCache::capacity
size_type capacity() const
Return the currently set capacity of the cache, i.e., its maximal size.
Definition: mru_cache.hpp:223
genesis::utils::MruCache::release_function
std::function< void(key_type const &, mapped_type &)> release_function
Function to be called when an element is removed from the cache.
Definition: mru_cache.hpp:553
genesis::utils::MruCache::fetch
mapped_type & fetch(key_type const &key)
Get an element.
Definition: mru_cache.hpp:286
genesis::utils::MruCache::begin
iterator begin()
Get an iterator to the begin of the cache, that is, the most recently used element.
Definition: mru_cache.hpp:158
genesis::utils::MruCache::key_type
Key key_type
Definition: mru_cache.hpp:108
genesis::utils::MruCache::touch
void touch(key_type const &key)
Bring an element to the front, and load it if needed.
Definition: mru_cache.hpp:431
genesis::utils::MruCache::value_type
std::pair< const key_type, mapped_type > value_type
Definition: mru_cache.hpp:110
genesis::utils::MruCache::end
iterator end()
Get an iterator to past-the-end of the cache, that is, one element after the least recently used one.
Definition: mru_cache.hpp:175
genesis::utils::MruCache::cache_misses
size_t cache_misses() const
Number of cache misses.
Definition: mru_cache.hpp:255
genesis::utils::MruCache::clear
void clear()
Clear the cache.
Definition: mru_cache.hpp:460
genesis::utils::MruCache::cend
const_iterator cend()
Get a const iterator to past-the-end of the cache, that is, one element after the least recently used...
Definition: mru_cache.hpp:200
genesis::utils::MruCache::const_iterator
typename std::list< value_type >::const_iterator const_iterator
Definition: mru_cache.hpp:113
genesis::utils::MruCache::empty
bool empty() const
Return whether the cache is currently empty, that is, has no elements loaded.
Definition: mru_cache.hpp:232
genesis::utils::MruCache::contains
bool contains(key_type const &key)
Return whether an element is currently in the cache.
Definition: mru_cache.hpp:415
genesis::utils::MruCache::cache_hits
size_t cache_hits() const
Number of cache hits.
Definition: mru_cache.hpp:243
genesis::utils::MruCache::iterator
typename std::list< value_type >::iterator iterator
Definition: mru_cache.hpp:112