I'm working on a optimization problem which contains various math functions which resembles in similar form, so I warp them in a FunctionObj
template <typename T>
struct FunctionObj
{
T a;
FunctionObj(): a(1)
{
}
};
And defines a FuncEval
to evaluate
template <typename T>
__host__ __device__ inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
return f_obj.a+x;
}
What I really want to do is sum {func(x)}
, so I defines a FuncEvalF
functor to make use of thrust::tranform_reduce
template <typename T>
struct FuncEvalF
{
const FunctionObj<T>& f_obj;
__host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{
}
__host__ __device__ inline T operator()(T x)
{
return FuncEval(f_obj, x);
}
};
template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T> &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float> &f_obj, int size, const float *x_in)
{
return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}
Finally in the main.cu
I call the transform_reduce
auto func = FuncEvalF<float>(FunctionObj<float>());
float result = 0;
try
{
result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());
}
catch (std::exception e)
{
printf("%s in thurst \n ", e.what());
}
Here the exception comes : bulk_kernel_by_value
, even I change the 10000 to 10. Thing only goes better when I change the defintion of FuncEval
to
return x;
The program would output the right but meaningless answer . I can't help asking what's wrong with my code ? Thank you for your attention.
Aucun commentaire:
Enregistrer un commentaire