thread__pool_8hpp_source.html

#ifndef GENESIS_UTILS_THREADING_THREAD_POOL_H_

#define GENESIS_UTILS_THREADING_THREAD_POOL_H_


/*

    Genesis - A toolkit for working with phylogenetic data.

    Copyright (C) 2014-2024 Lucas Czech


    This program is free software: you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation, either version 3 of the License, or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License

    along with this program.  If not, see <http://www.gnu.org/licenses/>.


    Contact:

    Lucas Czech <lucas.czech@sund.ku.dk>

    University of Copenhagen, Globe Institute, Section for GeoGenetics

    Oster Voldgade 5-7, 1350 Copenhagen K, Denmark

*/


#include "genesis/utils/threading/blocking_concurrent_queue.hpp"


#include <atomic>

#include <cassert>

#include <chrono>

#include <iterator>

#include <functional>

#include <future>

#include <memory>

#include <queue>

#include <stdexcept>

#include <thread>

#include <type_traits>

#include <utility>

#include <vector>


namespace genesis {

namespace utils {


// =================================================================================================

//     Forward Declarations

// =================================================================================================


class ThreadPool;


// =================================================================================================

//     Proactive Future

// =================================================================================================


template<class T>

class ProactiveFuture

{

public:


    // -------------------------------------------------------------

    //     Constructors and Rule of Five

    // -------------------------------------------------------------


    ProactiveFuture() noexcept = default;


private:


    ProactiveFuture( std::future<T> future_result, ThreadPool& thread_pool )

        : future_( std::move( future_result ))

        , thread_pool_( &thread_pool )

    {}


    friend ThreadPool;


public:


    ~ProactiveFuture() noexcept = default;


    ProactiveFuture( ProactiveFuture&& ) noexcept = default;

    ProactiveFuture( const ProactiveFuture& ) = delete;


    ProactiveFuture& operator=( ProactiveFuture&& ) noexcept = default;

    ProactiveFuture& operator=( const ProactiveFuture& ) = delete;


    // -------------------------------------------------------------

    //     Forwarded members

    // -------------------------------------------------------------


    T get()

    {

        // Use our busy waiting first, until we are ready.

        wait();

        assert( ready() );

        return future_.get();

    }


    template<typename U = T>

    typename std::enable_if<!std::is_void<U>::value, U&>::type get()

    {

        // Enable this method only if T is not void (non-void types).

        static_assert( ! std::is_same<T, void>::value, "ProactiveFuture::get() intended for T != void" );


        // Use our busy waiting first, until we are ready.

        wait();

        assert( ready() );

        return future_.get();

    }


    template<typename U = T>

    typename std::enable_if<std::is_void<U>::value>::type get()

    {

        // Enable this method only if T is void

        static_assert( std::is_same<T, void>::value, "ProactiveFuture::get() intended for T == void" );


        // Use our busy waiting first, until we are ready.

        wait();

        assert( ready() );

        return future_.get();

    }


    bool valid() const noexcept

    {

        return future_.valid();

    }


    void wait() const;


    template< class Rep, class Period >

    std::future_status wait_for( std::chrono::duration<Rep,Period> const& timeout_duration ) const

    {

        // If the user species a time to wait for, we just forward that to the future.

        return future_.wait_for( timeout_duration );

    }


    template< class Clock, class Duration >

    std::future_status wait_until( std::chrono::time_point<Clock,Duration> const& timeout_time ) const

    {

        // If the user species a time to wait until, we just forward that to the future.

        return future_.wait_until( timeout_time );

    }


    // -------------------------------------------------------------

    //     Additional members

    // -------------------------------------------------------------


    bool ready() const

    {

        throw_if_invalid_();

        return future_.wait_for( std::chrono::seconds(0) ) == std::future_status::ready;

    }


    bool deferred() const

    {

        throw_if_invalid_();

        return future_.wait_for( std::chrono::seconds(0) ) == std::future_status::deferred;

    }


    // -------------------------------------------------------------

    //     Internal members

    // -------------------------------------------------------------


private:


    void throw_if_invalid_() const

    {

        // From: https://en.cppreference.com/w/cpp/thread/future/wait

        // The implementations are encouraged to detect the case when valid() == false before the

        // call and throw a std::future_error with an error condition of std::future_errc::no_state.

        if( !future_.valid() ) {

            throw std::future_error( std::future_errc::no_state );

        }

    }


    // -------------------------------------------------------------

    //     Data members

    // -------------------------------------------------------------


private:


    std::future<T> future_;

    ThreadPool* thread_pool_;


};


// =================================================================================================

//     Thread Pool

// =================================================================================================


class ThreadPool

{

public:


    // -------------------------------------------------------------

    //     Constructors and Rule of Five

    // -------------------------------------------------------------


    explicit ThreadPool( size_t num_threads, size_t max_queue_size = 0 )

        : max_queue_size_( max_queue_size )

    {

        // We disallow a max queue size smaller than half the number of threads.

        // That would be slow and inefficient, and just not really what we want.

        if( max_queue_size_ > 0 && max_queue_size_ < num_threads * 2 ) {

            throw std::runtime_error(

                "Cannot use ThreadPool with max queue size less than "

                "half the number of threads, for efficiency"

            );

        }


        // Create the threads.

        init_( num_threads );

        assert( worker_pool_.size() == num_threads );

    }


    ThreadPool( ThreadPool const& ) = delete;

    ThreadPool( ThreadPool&& )      = delete;


    ThreadPool& operator= ( ThreadPool const& ) = delete;

    ThreadPool& operator= ( ThreadPool&& )      = delete;


    ~ThreadPool()

    {

        // Just in case, we wait for any unfinished work to be done, to avoid terminating

        // when tasks are still doing work that needs to be finished.

        wait_for_all_pending_tasks();

        assert( unfinished_tasks_.load() == 0 );


        // Send the special stop task to the pool, once for each worker.

        // As each worker stops upon receiving the task, this stops all workers.

        for( size_t i = 0; i < worker_pool_.size(); ++i ) {

            task_queue_.enqueue( WrappedTask( true ));

        }


        // Join them back into the main thread, after which there is no unfinished work.

        for( std::thread& worker : worker_pool_ ) {

            if( worker.joinable() ) {

                worker.join();

            }

        }

        assert( unfinished_tasks_.load() == 0 );

    }


    // -------------------------------------------------------------

    //     Accessor Members

    // -------------------------------------------------------------


    size_t size() const

    {

        return worker_pool_.size();

    }


    size_t pending_tasks_count() const

    {

        return unfinished_tasks_.load();

        // return task_queue_.size_approx();

    }


    // -------------------------------------------------------------

    //     Pool Functionality

    // -------------------------------------------------------------


    template<class F, class... Args>

    auto enqueue_and_retrieve( F&& f, Args&&... args )

    -> ProactiveFuture<typename std::result_of<F(Args...)>::type>

    {

        using result_type = typename std::result_of<F(Args...)>::type;


        // Make sure that we do not enqueue more tasks than the max size.

        run_tasks_until_below_max_queue_size_();


        // Prepare a promise and associated future of the task; the latter is our return value.

        // Using a promise ensures that any exception thrown in the task function

        // will be caught by the future, and re-thrown when its get() function is called,

        // see e.g., https://stackoverflow.com/a/16345305/4184258

        auto task_promise = std::make_shared<std::promise<result_type>>();

        auto future_result = ProactiveFuture<result_type>( task_promise->get_future(), *this );


        // To make our lives easier for the helper functions used below, we just wrap

        // the task in a function that can be called without arguments.

        std::function<result_type()> task_function = std::bind(

            std::forward<F>(f), std::forward<Args>(args)...

        );


        // Prepare the task that we want to submit.

        // All this wrapping should be completely transparent to the compiler, and removed.

        // The task captures the package including the promise that is needed for the future.

        WrappedTask wrapped_task;

        wrapped_task.function = make_wrapped_task_with_promise_( task_promise, task_function );


        // We first incrementi the unfinished counter, and only decrementing it once the task has

        // been fully processed. Thus, the counter always tells us if there is still work going on.

        ++unfinished_tasks_;

        task_queue_.enqueue( std::move( wrapped_task ));


        // The task is submitted. Return its future for the caller to be able to wait for it.

        return future_result;

    }


    template<class F, class... Args>

    void enqueue_detached( F&& f, Args&&... args )

    {

        // Make sure that we do not enqueue more tasks than the max size.

        run_tasks_until_below_max_queue_size_();


        // Prepare the task that we want to submit, by wrapping the function to be called.

        // All this wrapping should be completely transparent to the compiler, and removed.

        // The task captures the package including the promise that is needed for the future.

        WrappedTask wrapped_task;

        auto task_function = std::bind( std::forward<F>(f), std::forward<Args>(args)... );

        wrapped_task.function = [task_function, this]()

        {

            // Run the actual work task here. Once done, we can signal this to the unfinished list.

            task_function();

            assert( this->unfinished_tasks_.load() > 0 );

            --this->unfinished_tasks_;

        };


        // We add the task, incrementing the unfinished counter, and only decrementing it once the

        // task has been fully processed. That way, the counter always tells us if there is still

        // work going on. We capture a reference to `this` in the task above, which could be

        // dangerous if the threads survive the lifetime of the pool, but given that their exit

        // condition is only called from the pool destructor, this should never be able to happen.

        ++unfinished_tasks_;

        task_queue_.enqueue( std::move( wrapped_task ));

    }


    bool try_run_pending_task()

    {

        // Similar to the worker function, but without the blocking wait, as we might not ever

        // have any tasks in the queue, and would be waiting for the condition indefinitely.

        // Instead, we here just want to process a task if there is one, or return otherwise.

        WrappedTask task;

        if( task_queue_.try_dequeue( task )) {

            task.function();

            return true;

        }

        return false;

    }


    void wait_for_all_pending_tasks()

    {

        // Wait for all pending tasks to be processed. While we wait, we can also help

        // processing tasks! The loop stops once there are not more unfinished tasks.

        while( unfinished_tasks_.load() > 0 ) {

            while( try_run_pending_task() );

            std::this_thread::yield();

        }

        assert( unfinished_tasks_.load() == 0 );

    }


    // -------------------------------------------------------------

    //     Wrapped Task

    // -------------------------------------------------------------


private:


    struct WrappedTask

    {

        explicit WrappedTask( bool stop = false )

            : stop(stop)

        {}


        ~WrappedTask() = default;


        WrappedTask( WrappedTask const& ) = delete;

        WrappedTask( WrappedTask&& )      = default;


        WrappedTask& operator= ( WrappedTask const& ) = delete;

        WrappedTask& operator= ( WrappedTask&& )      = default;


        std::function<void()> function;

        bool stop;

    };


    // -------------------------------------------------------------

    //     Internal Members

    // -------------------------------------------------------------


    void init_( size_t num_threads )

    {

        // Create the desired number of workers.

        worker_pool_.reserve( num_threads );

        for( size_t i = 0; i < num_threads; ++i ) {

            worker_pool_.emplace_back(

                &worker_, this

            );

        }

    }


    static void worker_( ThreadPool* pool )

    {

        // Using a token for the consumer speeds it up. This is created once per worker thread

        // when the function is called from the thread constructor upon emplacing the worker

        // in the pool in init_()

        ConsumerToken consumer_token( pool->task_queue_ );


        // The worker runs an infinite loop of waiting for tasks,

        // only stopping once a special "stop" task is received.

        WrappedTask task;

        while( true ) {

            pool->task_queue_.wait_dequeue( consumer_token, task );

            if( task.stop ) {

                break;

            }

            task.function();

        }

    }


    inline void run_tasks_until_below_max_queue_size_()

    {

        // Check that we can enqueue a task at the moment, of if we need to wait and do work first.

        // In a high-contention situation, this of course could fail, so that once the loop condition

        // is checked, some other task already has finished the work. But that doesn't matter, the

        // call to try_run_pending_task will catch that and just do nothing. Also, the other way round

        // could happen, and the queue could in theory be overloaded if many threads try to enqueue

        // at exactly the same time. But we probably never have enough threads for that to be a real

        // issue - worst case, we exceed the max queue size by the number of threads, which is fine.

        // All we want to avoid is to have an infinitely growing queue.

        while( max_queue_size_ > 0 && pending_tasks_count() >= max_queue_size_ ) {

            try_run_pending_task();

        }

    }


    template<typename T>

    inline std::function<void()> make_wrapped_task_with_promise_(

        std::shared_ptr<std::promise<T>> task_promise,

        std::function<T()> task_function

    ) {

        // We capture a reference to `this` in the below lambda, which could be dangerous

        // if the threads survive the lifetime of the pool, but given that the pool destructor

        // waits for all of them to finish, this should never be able to happen.

        return [this, task_promise, task_function]()

        {

            // Run the work task, and set the value of the associated promise.

            // We need to delegate this here, as the std::promise::set_value() function

            // differs for void and non-void return types. That is unfortunate.

            // Also, as either the task function or setting the value of the promise can throw

            // an exception, but in between we need to decrement the unfiished tasks counter,

            // we need a way to figure out if we already did the decrement in case of an error.

            bool decremented_unfinished_tasks = false;

            try {

                run_task_and_fulfill_promise_<T>(

                    task_promise, task_function, decremented_unfinished_tasks

                );

            } catch (...) {

                if( !decremented_unfinished_tasks ) {

                    --unfinished_tasks_;

                    decremented_unfinished_tasks = true;

                }

                task_promise->set_exception( std::current_exception() );

            }

            assert( decremented_unfinished_tasks );

        };

    }


    template<typename T>

    typename std::enable_if<!std::is_void<T>::value>::type

    inline run_task_and_fulfill_promise_(

        std::shared_ptr<std::promise<T>> task_promise,

        std::function<T()> task_function,

        bool& decremented_unfinished_tasks

    ) {

        // Run the actual work task here. Once done, we can signal this to the unfinished list.

        // This bit is the only reason why the whole wrapping exists: We need to first decrement

        // the unfinished tasks count, before setting the promise value, as otherwise, outside

        // threads might deduce that there are more pending tasks, when in fact we are already done.

        auto result = task_function();

        assert( unfinished_tasks_.load() > 0 );

        --unfinished_tasks_;

        decremented_unfinished_tasks = true;

        task_promise->set_value( std::move( result ));

    }


    template<typename T>

    typename std::enable_if<std::is_void<T>::value>::type

    inline run_task_and_fulfill_promise_(

        std::shared_ptr<std::promise<T>> task_promise,

        std::function<void()> task_function,

        bool& decremented_unfinished_tasks

    ) {

        // Same as above, but for void functions, i.e., without setting a value for the promise.

        task_function();

        assert( unfinished_tasks_.load() > 0 );

        --unfinished_tasks_;

        decremented_unfinished_tasks = true;

        task_promise->set_value();

    }


    // -------------------------------------------------------------

    //     Internal Members

    // -------------------------------------------------------------


private:


    // Worker threads

    std::vector<std::thread> worker_pool_;


    // WrappedTask queue and its counters

    BlockingConcurrentQueue<WrappedTask> task_queue_;

    std::atomic<size_t> unfinished_tasks_{ 0 };

    size_t max_queue_size_;

};


// =================================================================================================

//      Deferred Definitions

// =================================================================================================


// Implemented here, as it needs ThreadPool to be defined first.

template<class T>

void ProactiveFuture<T>::wait() const

{

    // Let's be thorough. The standard encourages the check for validity.

    throw_if_invalid_();


    // Also, check that we have a valid thread pool.

    assert( thread_pool_ );

    // if( !thread_pool_ ) {

    //     throw std::runtime_error( "Invalid call to ProactiveFuture::wait() without a ThreadPool" );

    // }


    // If we have a deferred future, something is off - this was not created by us.

    // We do not do any busy work while waiting, as otherwise, it won't ever get ready.

    // As this might deadlock the thread pool, and was not done by us, we throw.

    assert( !deferred() );

    // if( deferred() ) {

        // throw std::runtime_error( "Invalid call to ProactiveFuture::wait() with a deferred future" );

        // return future_.wait();

    // }


    // Otherwise, we use the waiting time to process other tasks from the thread pool

    // that created this future in the first place.

    while( !ready() ) {

        assert( thread_pool_ );


        // We attempt to run a pending task. If that returns false, there were no tasks

        // in the pool, so we can yield our thread for now - nothing to do for now, just wait more.

        // We however need to keep waiting here. It could otherwise be that the task we are waiting

        // for submits more tasks later, which might then deadlock the thread pool, if we here

        // went into an actual wait for that first task - which would defy the main purpose of

        // having this proactive waiting future in the first place.

        // We also can't do anything with condition variables to omit the busy wait here, as we do

        // not know whether there will be any tasks in the queue at all before we are done here.

        if( ! thread_pool_->try_run_pending_task() ) {

            std::this_thread::yield();

        }

    }


    // We call wait just in case here again, to make sure that everything is all right.

    // Probably not necessary, as it's already ready, but won't hurt either.

    // future_.wait();

}


} // namespace utils

} // namespace genesis


#endif // include guard