c++11: C++ CUDA using Halton Sequence for Monte Carlo Simulation

mercredi 27 octobre 2021

C++ CUDA using Halton Sequence for Monte Carlo Simulation

I am quite new to programming in CUDA C++ and I am struggling to get this code to work. In short, it runs a Quasi Monte Carlo Simulation of stock prices. I have mainly three parts: the halton.cpp and halton.h to generate the Quasi random number, the main.cpp to run the process and the kernel.h and kernel.cu for implementing the Quasi monte carlo using CUDA.

However I keep getting errors like:

In file included from main.cpp:9:0:
/usr/include/curand.h:70:1: error: expected initializer before ‘extern’
 extern "C" {
 ^~~~~~
In file included from /usr/include/boost/math/policies/error_handling.hpp:15:0,
                 from /usr/include/boost/math/special_functions/gamma.hpp:23,
                 from /usr/include/boost/math/special_functions/erf.hpp:15,
                 from /usr/include/boost/math/distributions/normal.hpp:19,
                 from main.cpp:11:
/usr/include/c++/6/typeinfo:39:37: error: expected declaration before end of line
 #pragma GCC visibility push(default)

when i compile using

nvcc -o test main.cpp halton.cpp kernel.cu -lcurand

My files are like this and do anyone know what has gone wrong in my code?

main.cpp

#include <chrono>
#include <iostream>
#include <string>
#include <random>
#include <vector>
#include <math.h>
#include <cuda_runtime.h>
#include "kernel.h"
#include <curand.h>

#include <boost/math/distributions/normal.hpp>

#include "halton.hpp"

using namespace std;

void quasi_random_number_1d(double* rand_nums, int size, int skip) {
    boost::math::normal norm;
    const int LIMIT = 1600;
    int dim = 1;
    int count = 0;

    double halton_num;
    float quasi_num;
    double* quasi_seq;

    while (true) {
        quasi_seq = halton(dim, LIMIT);
        for (int i=0; i<LIMIT; i=i+skip){
            halton_num = *(quasi_seq+i);
            rand_nums[count] = (quantile(complement(norm, halton_num)));
            count++;
            if (count >= size) break;
        }
        dim += 1;
    }
    // return rand_nums;
}

int main() {

    // place quasi randnums in d_normal
    double S = 100;
    double sigma = 0.2;
    double rate = 0.02;
    double T = 3;
    double K = 95;
    int n = 10; // num of step
    int m = 10; // num of path

    double Zs [n*m];
    quasi_random_number_1d(Zs, n*m, 2);
    double ST [m];

    run_mc(ST, S, sigma, rate, T, n, m, Zs);

    for (int i=0; i<m; i++) {
        cout << ST[i] << " ";
    }
    cout << endl;

    return 0;
}

kernel.h

#ifndef _KERNEL_CUH_
#define _KERNEL_CUH_


// double S, double sigma, double rate, double T, double K, int n, int m, int seed=1126


// void run_mc(float * d_s,float T, float K, float B,
//  float S0, float sigma, float mu, float r,
//  float dt, float* d_normals, unsigned N_STEPS, unsigned N_PATHS);

void run_mc(double* ST, double S, double sigma, double rate, double T, int n,
    int m, double* Zs)

#endif

kernel.cu

#include "kernel.h"
#include <stdexcept>
#include <algorithm>
#include <math.h>

__global__ void monte_carlo_sim();

void run_mc(double* ST, double S, double sigma, double rate, double T, int n,
    int m, double* Zs) {

    double deltaT = T/n;
    double drift = exp(rate - 0.5*(pow(sigma, 2.0))*deltaT);

    // arrays
    double* d_ST = nullptr
    double* d_Zs = nullptr;
    cudaMalloc((void**)&dev_Zs, size * sizeof(double));
    cudaMalloc((void**)&dev_ST, size * sizeof(double));

    // values
    double d_S, d_sigma, d_deltaT, d_drift;
    int d_n, d_m;
    cudaMalloc(&d_S, sizeof(double));
    cudaMalloc(&d_sigma, sizeof(double));
    cudaMalloc(&d_deltaT, sizeof(double));
    cudaMalloc(&d_drift, sizeof(double));
    cudaMalloc(&d_n, sizeof(int));
    cudaMalloc(&d_m, sizeof(int));

    // copy values from host to device
    cudaMemcpy(d_Zs, Zs, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_ST, ST, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_S, S, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_sigma, sigma, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_deltaT, delta, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_drift, drift, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_n, n, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_m, m, size, cudaMemcpyHostToDevice);

    //test
    int threadsPerBlock,blocksPerGrid;
    if (n_el<1024){
        threadsPerBlock = n_el;
        blocksPerGrid   = 1;
    } else {
        threadsPerBlock = 1024;
        blocksPerGrid   = ceil(double(n_el)/double(threadsPerBlock));
    }

    // invoke the kernel
    monte_carlo_sim<<<blocksPerGrid,threadsPerBlock>>>(d_ST, d_Zs,
        d_S, d_sigma, d_deltaT, d_n, d_m);

    cudaMemcpy(ST, d_ST, size, cudaMemcpyDeviceToHost);

    // free device memory
    cudaFree(d_Zs);
    cudaFree(d_ST);
    cudaFree(d_S);
    cudaFree(d_sigma);
    cudaFree(d_deltaT);
    cudaFree(d_drift);
    cudaFree(d_n);
    cudaFree(d_m);

    // free host memory
    delete[] d_Zs;

}

__global__ void monte_carlo_sim(double* d_ST, const double* d_Zs,
    const double* d_S, const double* d_sigma, const double* d_deltaT,
    const double* d_n, const double* d_m) {

    const unsigned tid = threadIdx.x;
    const unsigned bid = blockIdx.x;
    const unsigned bsz = blockDim.x;

    int s_idx = tid + bid * bsz;
    int n_idx = tid + bid * bsz;

    double S = d_S;

    if (s_idx < d_m) {
        int ni = 0;
        do {
            S = S * d_drift * exp(d_sigma * sqrt(d_deltaT) * d_Zs[n_idx]);
        }
        while (ni < d_n);
        d_ST[s_idx] = S;
    }
}

For reference, the code below is an implementation for Monte Carlo Simulation for CPU/non-GPU/non-cuda:

#include <iostream>
#include <string>
#include <random>
#include <vector>
#include <math.h>

#include <boost/math/distributions/normal.hpp>

#include "halton.hpp"

using namespace std;

/*
Random Number Generator
*/

vector<vector<double>> quasi_random_number(int n, int m, int seed) {

    boost::math::normal norm;

    double* quasi_seq = halton(seed, n*m);
    vector<vector<double>> rand_nums(m, vector<double> (n, 0));

    for (int mi=0; mi<m; mi++){
        for (int ni=0; ni<n; ni++){
            double halton_num = *(quasi_seq+mi+ni);
            rand_nums[mi][ni] = quantile(complement(norm, halton_num));
        }
    }

    return rand_nums;
}

vector<vector<double>> pseudo_random_number(int n, int m, int seed) {
    std::default_random_engine generator(seed);
    std::normal_distribution<double> distribution (0.0,1.0);

    vector<vector<double>> rand_nums(m, vector<double> (n, 0));

    for (int mi=0; mi<m; mi++){
        for (int ni=0; ni<n; ni++){
            rand_nums[mi][ni] = distribution(generator);
        }
    }

    return rand_nums;
}



/*
Monte Carlo Simulation
*/

double compute_growth_factor(const double &drift, const double &sigma, const double &deltaT, const double &Z) {
    return drift * exp(sigma * sqrt(deltaT) * Z);
}

vector<vector<double>> monte_carlo_simulate(double S, double sigma, double rate, double T, double K, int n, int m, int seed=1126) {
    /*
    S - stock price
    sigma - stock variance
    rate - discount rate
    T - time periods
    K - strike price
    n - number of simulation period
    m - number of simulations
    */

    double deltaT = T/n;
    double drift = exp(rate - 0.5*(pow(sigma, 2.0))*deltaT);

    // vector<vector<double>>  Zs = pseudo_random_number(n, m, seed);
    vector<vector<double>>  Zs = quasi_random_number(n, m, seed);

    double growth_factor;
    vector<vector<double>> S_path(m, vector<double> (n, 0));

    for (int mi=0; mi<m; mi++) {
        vector<double> Z = Zs.at(mi);
        double growth_factor = compute_growth_factor(drift, sigma, deltaT, Z.at(0));
        S_path[mi][0] = S * growth_factor;
        for (int ni=1; ni<n; ni++) {
            growth_factor = compute_growth_factor(drift, sigma, deltaT, Z.at(ni));
            S_path[mi][ni] = growth_factor * S_path[mi][ni-1];
        }
    }
    return S_path;
}

int main() {

    vector<vector<double>> stock_paths = monte_carlo_simulate(100, 0.2, 0.02, 3, 95, 10, 10);

    for (int x=0; x<stock_paths.size(); x++) {
        for (int y=0; y<stock_paths[0].size(); y++) {
            std::cout << stock_paths[x][y] << " ";
        }
        std::cout << std::endl;
    }

    return 0;
}

c++11

mercredi 27 octobre 2021

C++ CUDA using Halton Sequence for Monte Carlo Simulation

Aucun commentaire:

Enregistrer un commentaire