jeudi 20 juillet 2023

c++ coroutine runs avx SIMD code, but causes SIGSEGV

c++ coroutine runs avx SIMD code, but causes SIGSEGV for AVX and AVX512


HelloCoroutine hello(int& index, int id, int group_size) {
#if 1
    __mmask8 res=0;
    for(auto i= index++; i< 20; i=index++)
    {

#if 0
// error
        std::cout <<"step 1" <<std::endl;
        __m512i v_offset = _mm512_set1_epi64(int64_t (i));
        std::cout <<"step 2" <<std::endl;
        __m512i v_size = _mm512_set1_epi64(int64_t(group_size));
        std::cout <<"step 3" <<std::endl;
        res = _mm512_cmpgt_epi64_mask(v_offset, v_size);
#elif 1 
// error
        std::cout <<"step 1" <<std::endl;
        __m256i v_offset = _mm256_set1_epi32(int32_t (i));
        std::cout <<"step 2" <<std::endl;
        __m256i v_size = _mm256_set1_epi32(int32_t(group_size));
        std::cout <<"step 3" <<std::endl;
        res = _mm256_cmpgt_epi32_mask(v_offset, v_size);

#else
// OK
        std::cout <<"step 1" <<std::endl;
        __m128i v_offset = _mm_set1_epi32(int32_t (i));
        std::cout <<"step 2" <<std::endl;
        __m128i v_size = _mm_set1_epi32(int32_t(group_size));
        std::cout <<"step 3" <<std::endl;
        res = _mm_cmpgt_epi32_mask(v_offset, v_size);
#endif       
#else
    int res=0;
    for(auto i= index++; i< 20; i=index++)
    {
        res = i > group_size;
#endif
        cout <<i << " > " << group_size <<" ? " << (int)res<<endl;
        co_await std::suspend_always();
    }
}

compile at https://godbolt.org/z/hcP988z8b

-std=c++20 -fcoroutines -mbmi2 -mavx -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl

but result error for avx and avx512, only SSE works OK

Program returned: 139 Program terminated with signal: SIGSEGV step 1

Aucun commentaire:

Enregistrer un commentaire