gzip__block__ostream_8cpp_source.html

/*

    Genesis - A toolkit for working with phylogenetic data.

    Copyright (C) 2014-2020 Lucas Czech


    This program is free software: you can redistribute it and/or modify

    it under the terms of the GNU General Public License as published by

    the Free Software Foundation,  either version 3 of the License,  or

    (at your option) any later version.


    This program is distributed in the hope that it will be useful,

    but WITHOUT ANY WARRANTY; without even the implied warranty of

    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

    GNU General Public License for more details.


    You should have received a copy of the GNU General Public License

    along with this program.  If not,  see <http://www.gnu.org/licenses/>.


    Contact:

    Lucas Czech <lucas.czech@h-its.org>

    Exelixis Lab,  Heidelberg Institute for Theoretical Studies

    Schloss-Wolfsbrunnenweg 35,  D-69118 Heidelberg,  Germany

*/


#include "genesis/utils/io/gzip_block_ostream.hpp"


#include "genesis/utils/core/thread_pool.hpp"


#include <cassert>

#include <fstream>

#include <future>

#include <memory>

#include <sstream>

#include <stdexcept>

#include <string>

#include <utility>

#include <vector>


#ifdef GENESIS_ZLIB


#    include "zlib.h"


#    if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)

#       include <fcntl.h>

#       include <io.h>

#   endif


#endif // GENESIS_ZLIB


#ifdef GENESIS_OPENMP

#   include <omp.h>

#endif // GENESIS_OPENMP


namespace genesis {

namespace utils {


// We only include all the class definitions if we actually use zlib.

// If not, we later also provide dummy implementations that throw if instanciated.

#ifdef GENESIS_ZLIB


// ================================================================================================

//     Gzip Block

// ================================================================================================


class GzipBlockCompressor

{

public:


    // -------------------------------------------------------------

    //     Constructors and Rule of Five

    // -------------------------------------------------------------


    GzipBlockCompressor(

        std::size_t block_size,

        int compression_level = Z_DEFAULT_COMPRESSION

    )

        : in_len_( block_size )

        , out_len_( 2 * block_size )

    {


        // Check compression level validity

        if(

            compression_level < static_cast<int>( GzipCompressionLevel::kDefaultCompression ) ||

            compression_level > static_cast<int>( GzipCompressionLevel::kBestCompression )

        ) {

            throw std::invalid_argument(

                "Compression level " + std::to_string( static_cast<int>( compression_level )) +

                " is invalid for usage in gzip output stream. Valid range is [ -1, 9 ]."

            );

        }


        // Prepare z_stream object

        zstream_ = new z_stream;

        zstream_->next_in = Z_NULL;

        zstream_->zalloc = Z_NULL;

        zstream_->zfree = Z_NULL;

        zstream_->opaque = Z_NULL;

        int ret = deflateInit2(

            zstream_, compression_level, Z_DEFLATED, 15+16, 8, Z_DEFAULT_STRATEGY

        );

        if( ret != Z_OK ) {

            throw except::GzipError( zstream_->msg, ret );

        }


        // Prepare buffers. We use two times the block size for the output, in the hope that this

        // always suffices for compressing one block of data. If not, we throw some cryptic message...

        in_buff_ = new char [in_len_];

        out_buff_ = new char [out_len_];

    }


    GzipBlockCompressor( GzipBlockCompressor const& ) = delete;

    GzipBlockCompressor( GzipBlockCompressor && )     = default;

    GzipBlockCompressor& operator = ( GzipBlockCompressor const& ) = delete;

    GzipBlockCompressor& operator = ( GzipBlockCompressor && )     = default;


    ~GzipBlockCompressor()

    {

        deflateEnd( zstream_ );

        delete [] in_buff_;

        delete [] out_buff_;

        delete zstream_;

    }


    // -------------------------------------------------------------

    //     Worker Functions

    // -------------------------------------------------------------


    std::pair<char*, size_t> get_input_buffer() const

    {

        return { in_buff_, in_len_ };

    }


    std::pair<char*, size_t> get_output_buffer() const

    {

        return { out_buff_, out_pos_ };

    }


    void compress( size_t avail_in )

    {

        // Start writing to the beginning of the output buffer. Only set that here, where we begin

        // a new block compression. This is then updated automatically from within the deflation loop.

        // We do set it here, so that it is 0 even if we do not compress any data (see next

        // condition).

        out_pos_ = 0;


        // If there is no input, do not write anything, in order to avoid compressing an empty

        // string by accident, which would result in unneccesary gzip headers without content.

        if( avail_in == 0 ) {

            return;

        }


        // Check that we are not asked to compress more data than the input buffer can hold.

        // This is an assertion, because we only use that class and function ourselves locally,

        // so we know what we are doing. If ever moved to the outside, make this an exception.

        assert( avail_in <= in_len_ );


        // Set zstream input buffer pointers. We only process as many bytes as given.

        // This is because the compress function might be called before the full in_len_ buffer

        // is filled, so we only compress what we are told to from the outside.

        zstream_->next_in = reinterpret_cast<decltype( zstream_->next_in )>( in_buff_ );

        zstream_->avail_in = avail_in;


        // Loop until all input is processed

        while( zstream_->avail_in > 0 ) {

            deflate_loop_( Z_NO_FLUSH );

        }


        // All data is done by now.

        assert( zstream_->avail_in == 0 );


        // Then, call deflate again asking to finish the zlib stream

        zstream_->next_in = nullptr;

        zstream_->avail_in = 0;

        deflate_loop_( Z_FINISH );


        // Now reset everything, so that the block can be used again

        deflateReset( zstream_ );

    }


    // -------------------------------------------------------------

    //     Internal Members

    // -------------------------------------------------------------


private:


    void deflate_loop_( int flush )

    {

        while( true ) {

            // When we get here, out_pos_ is already set from the caller to either be 0 for the

            // start of the compression, or left at the current output postion from some earlier

            // deflate loop. So, no need to change it.


            // Set zstream output buffer. It has twice the size, so should fit, but we later still

            // check and throw if not. Ugly, but everything else is just too complicated for now.

            assert( out_len_ >= out_pos_ );

            zstream_->next_out = reinterpret_cast<decltype( zstream_->next_out )>( out_buff_ + out_pos_ );

            zstream_->avail_out = out_len_ - out_pos_;


            // Run the deflate algorithm, and check the result

            int ret = deflate( zstream_, flush );

            if( ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR ) {

                throw except::GzipError( zstream_->msg, ret );

            }


            // Store the resulting end position in the output buffer after the deflation.

            // If this was too much, throw. Also, we check if nothing was written to the buffer;

            // in that case, we are done.

            auto const old_pos = out_pos_;

            out_pos_ = reinterpret_cast<decltype( out_buff_ )>( zstream_->next_out ) - out_buff_;

            if( out_pos_ >= out_len_ ) {

                throw except::GzipError( "Block compression ran out of buffer.", Z_MEM_ERROR );

            }


            // If we are done with the input, get out of here. The Z_BUF_ERROR error is not fatal,

            // but indicates that we are done with the input, and can continue.

            if( ret == Z_STREAM_END || ret == Z_BUF_ERROR || old_pos == out_pos_ ) {

                break;

            }

        }

    }


private:


    // Compression object

    z_stream* zstream_;


    // Store the input, and how many bytes are reserved for it.

    char*  in_buff_;

    size_t in_len_;


    // Store the compressed output, how many bytes are reserved,

    // and how many were used by the compression.

    char*  out_buff_;

    size_t out_len_;

    size_t out_pos_ = 0;

};


// ================================================================================================

//     Gzip Output Stream Buffer

// ================================================================================================


class GzipBlockOStreambuf

    : public std::streambuf

{


    // -------------------------------------------------------------

    //     Structs and Enums

    // -------------------------------------------------------------


private:


    struct BlockTask

    {

        BlockTask( std::size_t block_size, int compression_level )

            : block( block_size, compression_level )

        {}


        GzipBlockCompressor block;

        std::future<void> future;

    };


    // -------------------------------------------------------------

    //     Constructors and Rule of Five

    // -------------------------------------------------------------


public:


    GzipBlockOStreambuf(

        std::streambuf* sbuf_p,

        std::size_t block_size = GzipBlockOStream::GZIP_DEFAULT_BLOCK_SIZE,

        int compression_level = Z_DEFAULT_COMPRESSION,

        size_t num_threads = 1,

        size_t num_blocks = 0

    )

        : sbuf_p_( sbuf_p )

        , thread_pool_( num_threads )

    {

        // Basic setup. We take the number of threads as provided, and if given a number of blocks,

        // also use that. If not, we aim to use twice as many blocks as threads, so that there is

        // enough buffer keeping all worker threads busy. We want at least 2 blocks, so that we

        // have one for current writing operations of the stream, and one that can be compressed

        // at the same time.

        assert( sbuf_p_ );

        if( num_threads == 0 ) {

            throw std::invalid_argument(

                "Cannot create Gzip Block Output Stream with 0 worker threads."

            );

        }

        if( num_blocks == 0 ) {

            num_blocks = 2 * num_threads;

        }

        if( num_blocks < 2 ) {

            num_blocks = 2;

        }

        assert( num_threads >= 1 );

        assert( num_blocks >= 2 );


        // Create as many empty working blocks as needed.

        block_queue_.reserve( num_blocks );

        for( size_t i = 0; i < num_blocks; ++i ) {

            block_queue_.emplace_back( block_size, compression_level );

        }

        assert( block_queue_.size() > 0 );

        assert( block_queue_.size() == num_blocks );

        assert( current_block_ == 0 );


        // Use the first worker block as the current stream target buffer.

        auto block_in = block_queue_[ current_block_ ].block.get_input_buffer();

        setp( block_in.first, block_in.first + block_in.second );

    }


    GzipBlockOStreambuf( GzipBlockOStreambuf const& ) = delete;

    GzipBlockOStreambuf( GzipBlockOStreambuf &&) = delete;

    GzipBlockOStreambuf& operator = ( GzipBlockOStreambuf const& ) = delete;

    GzipBlockOStreambuf& operator = ( GzipBlockOStreambuf &&) = delete;


    virtual ~GzipBlockOStreambuf()

    {

        // Flush the stream

        //

        // NOTE: Errors here (sync() return value not 0) are ignored, because we

        // cannot throw in a destructor. This mirrors the behaviour of

        // std::basic_filebuf::~basic_filebuf(). To see an exception on error,

        // close the ofstream with an explicit call to close(), and do not rely

        // on the implicit call in the destructor.

        sync();

    }


    // -------------------------------------------------------------

    //     Internal and Virtual Functions

    // -------------------------------------------------------------


    virtual std::streambuf::int_type overflow(std::streambuf::int_type c = traits_type::eof()) override

    {

        // As fas as I understand the usage of the overflow() function, it is only called from the

        // std::streambuf functions (that we inherit from) when there is no more room in the buffer

        // to put the next byte to the stream. As we use blocks in the ring buffer as our (ever

        // changing) output buffer, we should only get here if such a block is fully used.

        // Assert this. If this assertion fails, our assumption is wrong that the overflow() is

        // only called from std::streambuf when there is an actual overflow. In that case, we need

        // to investigate what other std::streambuf functions call overflow, and why.

        // The assertion checks that the difference between the current write pointer of the stream

        // buffer and the beginning of the buffer is the same as the total length of the buffer.

        assert( pptr() >= pbase() );

        assert(

            static_cast<size_t>( pptr() - pbase() ) ==

            block_queue_[ current_block_ % block_queue_.size() ].block.get_input_buffer().second

        );


        // Also, assert that the buffer pointers are correct. In particular, the current

        // write pointer pptr needs to be at the same position as the buffer end pointer epptr.

        // This is a variation of the check above.

        // At the same time, the buffer start pointer pbase shoudl still be at the start of the block.

        assert( pptr() == epptr() );

        assert(

            pbase() ==

            block_queue_[ current_block_ % block_queue_.size() ].block.get_input_buffer().first

        );


        // We have an overflow, so the buffer of the current block is full. We can send it to

        // a worker thread for compression, and move on to the next block in the ring, which we

        // then use as the new buffer for storing our input data.

        // If the ring is full, we wait for the next block in order to finish being

        // compressed, and then write it to the underlying stream.

        // All of this is done in the function call here.

        auto ret = compress_current_block_and_move_to_next_block_();

        if( ret != 0 ) {

            setp( nullptr, nullptr );

            return traits_type::eof();

        }


        return traits_type::eq_int_type(c, traits_type::eof()) ? traits_type::eof() : sputc(c);

    }


    virtual int sync() override

    {

        // No pointer to be used. That is an error.

        if( !pptr() ) {

            return -1;

        }


        // First, send all remaining buffered input of the current block to a compression worker.

        // Return early if there was an issue writing any previously processed compressed blocks

        // to the output sink stream.

        auto ret = compress_current_block_and_move_to_next_block_();

        if( ret != 0 ) {

            return ret;

        }


        // Then, write all blocks that are still in the queue. We need to do a full round, because

        // otherwise we have no way of knowing which blocks were used so far - for very short files,

        // we will not even yet have filled the queue completely.

        size_t cur = current_block_ % block_queue_.size();

        do {


            // Write the compressed block, if it has data, potentially waiting for its compression

            // to be finished by some worker thread first.

            if( block_queue_[ cur ].future.valid() ) {

                ret = write_compressed_block_( cur );


                // Return early if there was an issue writing to the output sink stream.

                if( ret != 0 ) {

                    return ret;

                }

            }


            // Process next, wrapping around the ring.

            ++cur;

            cur %= block_queue_.size();

        } while( cur != current_block_ % block_queue_.size() );

        assert( cur == current_block_ % block_queue_.size() );


        // Assert that we flushed all blocks, that is, we waited for all their compression to be

        // done and all their data to be written to our underlying output sink stream.

        // In that case, none of them should have a valid future, which we check here via a lambda

        // that we immediately call.

        assert(

            [this](){

                for( auto const& block : block_queue_ ) {

                    if( block.future.valid() ) {

                        return false;

                    }

                }

                return true;

            }()

        );


        // Also, the queue of the thread pool must be empty, because we just waited

        // for all jobs to finish.

        assert( thread_pool_.load() == 0 );


        // If we got here, all previous checks of `ret` were okay. So it still should be okay now.

        assert( ret == 0 );

        return ret;

    }


private:


    int compress_current_block_and_move_to_next_block_()

    {

        // Get th current block. We were busy filling it with new input data, so it cannot have

        // been compressed already, meaning it cannot have a valid future.

        auto& cur_block = block_queue_[ current_block_ % block_queue_.size() ];

        assert( ! cur_block.future.valid() );


        // Assert that all pointers are where they should be

        assert( pbase() == cur_block.block.get_input_buffer().first );

        assert( epptr() == cur_block.block.get_input_buffer().first + cur_block.block.get_input_buffer().second );


        // Send block to a compression worker thread, using all bytes that have been written to it.

        // The thread pool will pick up the task once a thread is available.

        auto const avail_in = pptr() - pbase();

        cur_block.future = thread_pool_.enqueue(

            [&]( size_t av_in ){

                cur_block.block.compress( av_in );

            },

            avail_in

        );


        // Move to next block in the ring buffer queue

        ++current_block_;

        auto& next_block = block_queue_[ current_block_ % block_queue_.size() ];


        // If the block has a future, that means that we sent it to compression before.

        // Because we use a ring buffer, that hence means that the ring is full. There are

        // currently only full blocks that are either already compressed or under compression

        // by some worker thread, or waiting to be compressed, but no block that we can use

        // as our next input buffer for writing data to.

        // Hence, we have to wait for the block to finish being compressed and then write it to our

        // underlying sink stream, before we can finally re-use the block as our new target buffer

        // for  the incoming data.

        int ret = 0;

        if( next_block.future.valid() ) {


            // If we are here, the ring buffer queue is full. In that case, all blocks have been

            // added to the thread pool for being compressed.

            // Assert that indeed all bocks contain valid futures, that is, they all have been

            // send to be compressed at some point before. We use a lambda that executes itself.

            assert(

                [this](){

                    for( auto const& block : block_queue_ ) {

                        if( ! block.future.valid() ) {

                            return false;

                        }

                    }

                    return true;

                }()

            );


            // Write the compressed block to the underlying stream,

            // potentially waiting until its compression is finished.

            ret = write_compressed_block_( current_block_ % block_queue_.size() );

        }


        // Now, the block is written, and we can re-use it as the new stream buffer.

        auto block_in = next_block.block.get_input_buffer();

        setp( block_in.first, block_in.first + block_in.second );


        // Assert that all pointers are where they should be

        assert( pbase() == block_in.first );

        assert( pptr()  == block_in.first );

        assert( epptr() == block_in.first + block_in.second );


        // Return value: was the writing of the previously compressed blocks successful.

        // If not, there was an error somewhere.

        return ret;

    }


    int write_compressed_block_( size_t block_num )

    {

        // Get the block to write. It has to have a future, as we only call this function

        // when the block was previously sent to a worker to be compressed.

        assert( block_num < block_queue_.size() );

        auto& block = block_queue_[ block_num ];

        assert( block.future.valid() );


        // Make sure that the block compression thread is finished

        block.future.get();


        // Get the block output begin and end, and write it to the underlying stream

        auto const block_out = block.block.get_output_buffer();

        std::streamsize sz = sbuf_p_->sputn( block_out.first, block_out.second );


        // Check if there was an error in the sink stream

        if( sz != static_cast<decltype(sz)>( block_out.second )) {

            return -1;

        }

        return 0;

    }


    // -------------------------------------------------------------

    //     Members

    // -------------------------------------------------------------


private:


    // Target sink stream to write compressed blocks to

    std::streambuf * sbuf_p_;


    // Get a pool of workers that will do the compression of each block

    ThreadPool thread_pool_;


    // Ring-buffer-like usage of compression blocks: we rotate, and wait if the compression is not

    // yet done for the next block to be re-used in the ring. The current_block_ number only ever

    // increases (that is, it counts the total number of blocks that have been processed so far).

    // This is meant as a helper for future extensions that might want to keep track of byte

    // offsets of output blocks (not yet implemented).

    size_t current_block_ = 0;

    std::vector<BlockTask> block_queue_;


};


// ================================================================================================

//     Gzip Output Stream

// ================================================================================================


size_t gzip_block_get_num_threads_()

{

    #ifdef GENESIS_OPENMP

        size_t threads = 0;


        // Start a parallel region. OpenMP should be configured in a way that nested regions

        // only get as many threads as there are free ones, so this will not overestimate the number

        // of available threads.

        #pragma omp parallel

        {

            threads = omp_get_num_threads();

        }

        if( threads == 0 ) {

            threads = 1;

        }

        return threads;

    #else

        return 1;

    #endif

}


// We have all the implementation here, so that we do not need to expose the stream buffer.


GzipBlockOStream::GzipBlockOStream(

    std::ostream& os,

    std::size_t block_size,

    GzipCompressionLevel compression_level,

    std::size_t num_threads

)

    : GzipBlockOStream( os.rdbuf(), block_size, compression_level, num_threads )

{

    // Nothing to do

}


GzipBlockOStream::GzipBlockOStream(

    std::streambuf* sbuf_p,

    std::size_t block_size,

    GzipCompressionLevel compression_level,

    std::size_t num_threads

)

    : std::ostream( new GzipBlockOStreambuf(

        sbuf_p,

        block_size,

        static_cast<int>(compression_level),


        // Trickery: if 0 threads are set, we attempt automatic estimation:

        // use the number of active OpenMP threads in a parallel region

        num_threads == 0

            ? gzip_block_get_num_threads_()

            : num_threads

    ))

{

    exceptions(std::ios_base::badbit);

}


GzipBlockOStream::~GzipBlockOStream()

{

    delete rdbuf();

}


// Up until here, we defined all classes needed for gzip block output streaming.

// However, if genesis is compiled without zlib support, we instead use dummy implementations

// which throw exceptions when being used.

#else // GENESIS_ZLIB


// ================================================================================================

//     Gzip Output Stream

// ================================================================================================


GzipBlockOStream::GzipBlockOStream(

    std::ostream&, std::size_t, GzipCompressionLevel, std::size_t

) {

    throw std::runtime_error( "zlib: Genesis was not compiled with zlib support." );

}


GzipBlockOStream::GzipBlockOStream(

    std::streambuf*, std::size_t, GzipCompressionLevel, std::size_t

) {

    throw std::runtime_error( "zlib: Genesis was not compiled with zlib support." );

}


GzipBlockOStream::~GzipBlockOStream()

{}


#endif // GENESIS_ZLIB


} // namespace utils

} // namespace genesis