In a simple test CUDA application, I have a pointer pointing to a list of class instances, and I copy that data to the GPU. I then run a kernel function many times. The kernel function then calls a __device__ member function for each class instance which increments a variable, profitLoss.
For some reason, profitLoss is not incrementing. Here is the code I have:
#include <stdio.h>
#include <stdlib.h>
#define N 200000
class Strategy {
    private:
        double profitLoss;
    public:
        __device__ __host__ Strategy() {
            this->profitLoss = 0;
        }
        __device__ __host__ void backtest() {
            this->profitLoss++;
        }
        __device__ __host__ double getProfitLoss() {
            return this->profitLoss;
        }
};
__global__ void backtestStrategies(Strategy *strategies) {
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i < N) {
        strategies[i].backtest();
    }
}
int main() {
    int threadsPerBlock = 1024;
    int blockCount = 32;
    Strategy *devStrategies;
    Strategy *strategies = (Strategy*)malloc(N * sizeof(Strategy));
    double *data = (double*)malloc(1000 * sizeof(double));
    double *devData;
    int i = 0;
    cudaSetDevice(0);
    // Allocate memory for strategies on the GPU.
    cudaMalloc((void**)&devStrategies, N * sizeof(Strategy));
    cudaMalloc((void**)&devData, 1000 * sizeof(double));
    // Initialize strategies on host.
    for (i=0; i<N; i++) {
        strategies[i] = Strategy();
    }
    // Copy strategies from host to GPU.
    cudaMemcpy(devStrategies, strategies, N * sizeof(Strategy), cudaMemcpyHostToDevice);
    for (i=0; i<363598; i++) {
        backtestStrategies<<<blockCount, threadsPerBlock>>>(devStrategies);
    }
    // Copy strategies from the GPU.
    cudaMemcpy(strategies, devStrategies, N * sizeof(Strategy), cudaMemcpyDeviceToHost);
    cudaMemcpy(data, devData, 1000 * sizeof(double), cudaMemcpyDeviceToHost);
    // Display results.
    for (i=0; i<N; i++) {
        printf("%f\n", strategies[i].getProfitLoss());
    }
    // Free memory for the strategies on the GPU.
    cudaFree(devStrategies);
    return 0;
}
The output is as follows:
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
0.000000
...
I would expect it to be:
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
363597.000000
...
I believe profitLoss is not incrementing due to the way I have initialized the objects (automatic storage duration), and I'm not sure of a better way to instantiate these objects and cudaMemcpy them over to the GPU:
strategies[i] = Strategy();
Can anyone offer any suggestions on how to fix this issue or what might be the cause? Thank you in advance!
Aucun commentaire:
Enregistrer un commentaire