jeudi 24 septembre 2015

Is my implementation of a lock-free fixed-sized allocator actually thread safe?

I've tried implementing a lock-free fixed-size allocator. Here are the related classes.

Convenience aliases

#ifndef OAG_TEMPLATE_UTLITY_H
#define OAG_TEMPLATE_UTLITY_H
namespace oag
{
    template <typename C>
    using Pointer_type = typename C::pointer;

    template <typename C>
    using Size_type = typename C::size_type;
}
#endif // !OAG_TEMPLATE_UTLITY_H   

Implementation of the lock-free fixed-size allocator with default memory ordering for all atomic operations for verification of thread safety without orderings:

#ifndef OAG_LOCK_FREE_MEMORY_CHUNK_H
#define OAG_LOCK_FREE_MEMORY_CHUNK_H

#include <atomic>
#include "template_utility.h"

namespace oag
{
    template <typename T, typename SizeT = unsigned char>
    class lock_free_memory_chunk
    {
    public:
        using value_type = T;
        using pointer = value_type*;
        using size_type = SizeT;

    private:
        using byte = unsigned char;

    public:
        explicit lock_free_memory_chunk( size_type const );

        pointer allocate() noexcept;
        void deallocate( pointer ) noexcept;

    private:
        bool dec_avail_blocks();

    private:
        static auto constexpr block_sz = sizeof( value_type ) < sizeof( size_type ) ?
            sizeof( size_type ) : sizeof( value_type );

    private:
        byte* p_bytes_;
        std::atomic<size_type> next_alloc_idx_;
        std::atomic<size_type> num_avail_blocks_;
    };
}

namespace oag
{
    template <typename T, typename SizeT>
    lock_free_memory_chunk<T, SizeT>::lock_free_memory_chunk( size_type const capacity ) :
        p_bytes_{ new byte[ sizeof( value_type ) * capacity ] },
        next_alloc_idx_{ 0 },
        num_avail_blocks_{ capacity }
    {
        static_assert( sizeof( byte ) == 1, "sizeof(unsigned char) != 1" );

        size_type i{ 0 };
        for ( byte* p{ p_bytes_ }; i < capacity; p += block_sz )
        {
            *reinterpret_cast<size_type*>( p ) = ++i;
        }
    }

    template <typename T, typename SizeT>
    inline oag::Pointer_type<lock_free_memory_chunk<T, SizeT>>
    lock_free_memory_chunk<T, SizeT>::allocate() noexcept
    {
        if ( !dec_avail_blocks() )                                                  // 1A
            return nullptr;

        size_type alloc_idx{ next_alloc_idx_.load() };                              // 1B
        while ( !next_alloc_idx_.compare_exchange_weak(                             // 1C
            alloc_idx,
            *reinterpret_cast<size_type*>( p_bytes_ + alloc_idx * block_sz ) ) )
        {
        }

        return reinterpret_cast<pointer>( p_bytes_ + alloc_idx * block_sz );
    }

    template <typename T, typename SizeT>
    inline void
    lock_free_memory_chunk<T, SizeT>::deallocate( pointer p ) noexcept
    {
        auto next_alloc_from_p{ next_alloc_idx_.load() };                           // 2A
        auto new_next_alloc_idx                                                     // 2B
        {
            static_cast<size_type>(
            ( reinterpret_cast<byte*>( p ) - p_bytes_ ) / block_sz )
        };
        while ( !next_alloc_idx_.compare_exchange_weak(                             // 2C
            next_alloc_from_p,
            new_next_alloc_idx ) )
        {
        }
        *reinterpret_cast<size_type*>( p ) = next_alloc_from_p;                     // 2D

        num_avail_blocks_.fetch_add( 1 );
    }

    template<typename T, typename SizeT>
    inline bool
    oag::lock_free_memory_chunk<T, SizeT>::dec_avail_blocks()
    {
        auto curr_num_avail_blocks{ num_avail_blocks_.load() };                     // 3A
        auto dec_num_avail_blocks                                                   // 3B
        {
            curr_num_avail_blocks > 0 ? curr_num_avail_blocks - 1 : 0
        };
        while ( !num_avail_blocks_.compare_exchange_strong(                         // 3C
            curr_num_avail_blocks,
            dec_num_avail_blocks ) )
        {
            dec_num_avail_blocks = curr_num_avail_blocks > 0 ?                      // 3D
                curr_num_avail_blocks - 1 : 0;
        }

        return curr_num_avail_blocks > 0 ? true : false;
    }
}
#endif // !OAG_LOCK_FREE_MEMORY_CHUNK_H

General description

Memory is sizeof(value_type) or size(size_type) multiplied by the desired capacity.

Every memory block stores an offset to the next block at its start.

Location    [0           ]|[1           ]|...|[N - 1       ]
Contents    [1, obj_bytes]|[2, obj_bytes]|...|[N, obj_bytes]

The offset is lost at allocation as the object takes its required space starting from the start of block.

Function description

allocate()

  • 1A. Make sure that there are available blocks and decrease the number of available blocks by 1 before proceeding; true if blocks are available.

  • 1B. Load the next allocation offset.

  • 1C. Make sure that the allocation offset is unique for every thread in allocate().

deallocate(pointer)

  • 2A. Load the offset of the next allocation.

  • 2B. Calculate the offset from p_bytes_ to parameter p.

  • 2C. Make sure that the value of the offset from p is unique.

  • 2D. Set the offset of p to the unique value.

dec_avail_blocks()

  • 3A. Load the current number of available blocks.
  • 3B. Prevent underflow when decrementing the number of available blocks.
  • 3C. Make sure that if the number of available blocks changes, that the new count is updated.
  • 3D. Return whether there are available blocks or not to allocate().

Question

Are there any thread safety issues that can slip through the cracks?

Aucun commentaire:

Enregistrer un commentaire