c++ coroutine runs avx SIMD code, but causes SIGSEGV for AVX and AVX512
HelloCoroutine hello(int& index, int id, int group_size) {
#if 1
__mmask8 res=0;
for(auto i= index++; i< 20; i=index++)
{
#if 0
// error
std::cout <<"step 1" <<std::endl;
__m512i v_offset = _mm512_set1_epi64(int64_t (i));
std::cout <<"step 2" <<std::endl;
__m512i v_size = _mm512_set1_epi64(int64_t(group_size));
std::cout <<"step 3" <<std::endl;
res = _mm512_cmpgt_epi64_mask(v_offset, v_size);
#elif 1
// error
std::cout <<"step 1" <<std::endl;
__m256i v_offset = _mm256_set1_epi32(int32_t (i));
std::cout <<"step 2" <<std::endl;
__m256i v_size = _mm256_set1_epi32(int32_t(group_size));
std::cout <<"step 3" <<std::endl;
res = _mm256_cmpgt_epi32_mask(v_offset, v_size);
#else
// OK
std::cout <<"step 1" <<std::endl;
__m128i v_offset = _mm_set1_epi32(int32_t (i));
std::cout <<"step 2" <<std::endl;
__m128i v_size = _mm_set1_epi32(int32_t(group_size));
std::cout <<"step 3" <<std::endl;
res = _mm_cmpgt_epi32_mask(v_offset, v_size);
#endif
#else
int res=0;
for(auto i= index++; i< 20; i=index++)
{
res = i > group_size;
#endif
cout <<i << " > " << group_size <<" ? " << (int)res<<endl;
co_await std::suspend_always();
}
}
compile at https://godbolt.org/z/hcP988z8b
-std=c++20 -fcoroutines -mbmi2 -mavx -mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl
but result error for avx and avx512, only SSE works OK
Program returned: 139 Program terminated with signal: SIGSEGV step 1
Aucun commentaire:
Enregistrer un commentaire