I'm learning opencl, but in one of my tests I tried to use work_group_reduce_add to add 3 floating values of an array (1.5f, 1.5f, 1.5f), the result I expected was 4.5f the most was 4.0f. And when I try a matrix with 2 values (1.5f, 1.0f), the result is the expected 2.5f, but when I try with more values from the third element of the matrix, the floating values start to be treated as integers. My code is just below.
std::vector<cl::Platform> plataforms;
cl::Platform::get(&plataforms);
std::vector<cl::Device> devices;
plataforms.front().getDevices(CL_DEVICE_TYPE_GPU, &devices);
cl::Context context(devices.front());
const char* code = "\
__kernel void test(__global float* a, __global float* b) {\
b[0] = work_group_reduce_add(a[get_global_id(0)]);\
}\
";
cl::Program::Sources src(1, std::make_pair(code, strlen(code)));
cl::Program program(context, src);
program.build("-cl-std=CL2.0");
cl::CommandQueue queue_default(context, devices.front(), CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE_DEFAULT);
cl::CommandQueue queue(context, devices.front());
cl::Kernel kernel_test(program, "test");
float a[3] = { 1.5f, 1.5f, 1.5f };
float b[1] = { 0.0f };
cl::Buffer _a(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(float) * 3, a);
cl::Buffer _b(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(float), b);
kernel_test.setArg(0, _a);
kernel_test.setArg(1, _b);
queue.enqueueNDRangeKernel(kernel_test, cl::NDRange(), cl::NDRange(3), cl::NDRange(3));
queue.finish();
Aucun commentaire:
Enregistrer un commentaire