In a simple test CUDA application, I have a pointer pointing to a list of class instances, and I copy that data to the GPU. I then run a kernel function many times. The kernel function then calls a __device__
member function for each class instance which increments a variable, profitLoss
.
For some reason, profitLoss
is not incrementing. Here is the code I have:
#include <stdio.h>
#include <stdlib.h>
#define N 200000
class Strategy {
private:
double profitLoss;
public:
__device__ __host__ Strategy() {
this->profitLoss = 0;
}
__device__ __host__ void backtest() {
this->profitLoss++;
}
__device__ __host__ double getProfitLoss() {
return this->profitLoss;
}
};
__global__ void backtestStrategies(Strategy *strategies) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < N) {
strategies[i].backtest();
}
}
int main() {
int threadsPerBlock = 1024;
int blockCount = 32;
Strategy *devStrategies;
Strategy *strategies = (Strategy*)malloc(N * sizeof(Strategy));
double *data = (double*)malloc(1000 * sizeof(double));
double *devData;
int i = 0;
cudaSetDevice(0);
// Allocate memory for strategies on the GPU.
cudaMalloc((void**)&devStrategies, N * sizeof(Strategy));
cudaMalloc((void**)&devData, 1000 * sizeof(double));
// Initialize strategies on host.
for (i=0; i<N; i++) {
strategies[i] = Strategy();
}
// Copy strategies from host to GPU.
cudaMemcpy(devStrategies, strategies, N * sizeof(Strategy), cudaMemcpyHostToDevice);
for (i=0; i<363598; i++) {
backtestStrategies<<<blockCount, threadsPerBlock>>>(devStrategies);
}
// Copy strategies from the GPU.
cudaMemcpy(strategies, devStrategies, N * sizeof(Strategy), cudaMemcpyDeviceToHost);
cudaMemcpy(data, devData, 1000 * sizeof(double), cudaMemcpyDeviceToHost);
// Display results.
for (i=0; i<N; i++) {
printf("%f\n", strategies[i].getProfitLoss());
}
// Free memory for the strategies on the GPU.
cudaFree(devStrategies);
return 0;
}
The output is as follows:
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
...
I would expect it to be:
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
...
I believe profitLoss
is not incrementing due to the way I have initialized the objects (automatic storage duration), and I'm not sure of a better way to instantiate these objects and cudaMemcpy
them over to the GPU:
strategies[i] = Strategy();
Can anyone offer any suggestions on how to fix this issue or what might be the cause? Thank you in advance!
Aucun commentaire:
Enregistrer un commentaire