c++11: Is my implementation of a lock-free fixed-sized allocator actually thread safe?

jeudi 24 septembre 2015

Is my implementation of a lock-free fixed-sized allocator actually thread safe?

I've tried implementing a lock-free fixed-size allocator. Here are the related classes.

Convenience aliases

#ifndef OAG_TEMPLATE_UTLITY_H
#define OAG_TEMPLATE_UTLITY_H
namespace oag
{
    template <typename C>
    using Pointer_type = typename C::pointer;

    template <typename C>
    using Size_type = typename C::size_type;
}
#endif // !OAG_TEMPLATE_UTLITY_H

Implementation of the lock-free fixed-size allocator with default memory ordering for all atomic operations for verification of thread safety without orderings:

#ifndef OAG_LOCK_FREE_MEMORY_CHUNK_H
#define OAG_LOCK_FREE_MEMORY_CHUNK_H

#include <atomic>
#include "template_utility.h"

namespace oag
{
    template <typename T, typename SizeT = unsigned char>
    class lock_free_memory_chunk
    {
    public:
        using value_type = T;
        using pointer = value_type*;
        using size_type = SizeT;

    private:
        using byte = unsigned char;

    public:
        explicit lock_free_memory_chunk( size_type const );

        pointer allocate() noexcept;
        void deallocate( pointer ) noexcept;

    private:
        bool dec_avail_blocks();

    private:
        static auto constexpr block_sz = sizeof( value_type ) < sizeof( size_type ) ?
            sizeof( size_type ) : sizeof( value_type );

    private:
        byte* p_bytes_;
        std::atomic<size_type> next_alloc_idx_;
        std::atomic<size_type> num_avail_blocks_;
    };
}

namespace oag
{
    template <typename T, typename SizeT>
    lock_free_memory_chunk<T, SizeT>::lock_free_memory_chunk( size_type const capacity ) :
        p_bytes_{ new byte[ sizeof( value_type ) * capacity ] },
        next_alloc_idx_{ 0 },
        num_avail_blocks_{ capacity }
    {
        static_assert( sizeof( byte ) == 1, "sizeof(unsigned char) != 1" );

        size_type i{ 0 };
        for ( byte* p{ p_bytes_ }; i < capacity; p += block_sz )
        {
            *reinterpret_cast<size_type*>( p ) = ++i;
        }
    }

    template <typename T, typename SizeT>
    inline oag::Pointer_type<lock_free_memory_chunk<T, SizeT>>
    lock_free_memory_chunk<T, SizeT>::allocate() noexcept
    {
        if ( !dec_avail_blocks() )                                                  // 1A
            return nullptr;

        size_type alloc_idx{ next_alloc_idx_.load() };                              // 1B
        while ( !next_alloc_idx_.compare_exchange_weak(                             // 1C
            alloc_idx,
            *reinterpret_cast<size_type*>( p_bytes_ + alloc_idx * block_sz ) ) )
        {
        }

        return reinterpret_cast<pointer>( p_bytes_ + alloc_idx * block_sz );
    }

    template <typename T, typename SizeT>
    inline void
    lock_free_memory_chunk<T, SizeT>::deallocate( pointer p ) noexcept
    {
        auto next_alloc_from_p{ next_alloc_idx_.load() };                           // 2A
        auto new_next_alloc_idx                                                     // 2B
        {
            static_cast<size_type>(
            ( reinterpret_cast<byte*>( p ) - p_bytes_ ) / block_sz )
        };
        while ( !next_alloc_idx_.compare_exchange_weak(                             // 2C
            next_alloc_from_p,
            new_next_alloc_idx ) )
        {
        }
        *reinterpret_cast<size_type*>( p ) = next_alloc_from_p;                     // 2D

        num_avail_blocks_.fetch_add( 1 );
    }

    template<typename T, typename SizeT>
    inline bool
    oag::lock_free_memory_chunk<T, SizeT>::dec_avail_blocks()
    {
        auto curr_num_avail_blocks{ num_avail_blocks_.load() };                     // 3A
        auto dec_num_avail_blocks                                                   // 3B
        {
            curr_num_avail_blocks > 0 ? curr_num_avail_blocks - 1 : 0
        };
        while ( !num_avail_blocks_.compare_exchange_strong(                         // 3C
            curr_num_avail_blocks,
            dec_num_avail_blocks ) )
        {
            dec_num_avail_blocks = curr_num_avail_blocks > 0 ?                      // 3D
                curr_num_avail_blocks - 1 : 0;
        }

        return curr_num_avail_blocks > 0 ? true : false;
    }
}
#endif // !OAG_LOCK_FREE_MEMORY_CHUNK_H

General description

Memory is sizeof(value_type) or size(size_type) multiplied by the desired capacity.

Every memory block stores an offset to the next block at its start.

Location    [0           ]|[1           ]|...|[N - 1       ]
Contents    [1, obj_bytes]|[2, obj_bytes]|...|[N, obj_bytes]

The offset is lost at allocation as the object takes its required space starting from the start of block.

Function description

allocate()

1A. Make sure that there are available blocks and decrease the number of available blocks by 1 before proceeding; true if blocks are available.
1B. Load the next allocation offset.
1C. Make sure that the allocation offset is unique for every thread in allocate().

deallocate(pointer)

2A. Load the offset of the next allocation.
2B. Calculate the offset from p_bytes_ to parameter p.
2C. Make sure that the value of the offset from p is unique.
2D. Set the offset of p to the unique value.

dec_avail_blocks()

3A. Load the current number of available blocks.
3B. Prevent underflow when decrementing the number of available blocks.
3C. Make sure that if the number of available blocks changes, that the new count is updated.
3D. Return whether there are available blocks or not to allocate().

Question

Are there any thread safety issues that can slip through the cracks?

c++11

jeudi 24 septembre 2015

Is my implementation of a lock-free fixed-sized allocator actually thread safe?

Aucun commentaire:

Enregistrer un commentaire