I'm writing a cuda library and I need to check the differences in performance between the option CPU and GPU. So I created a simple class called Timer to measure the time required to execute first a GPU function and then the CPU version.
class Timer
{
public:
Timer()
{
_StartTimepoint = std::chrono::steady_clock::now();
}
~Timer() {}
void Stop()
{
_stopped = true;
using namespace std::chrono;
auto endTimepoint = steady_clock::now();
auto start = time_point_cast<milliseconds>(_StartTimepoint).time_since_epoch().count();
auto end = time_point_cast<milliseconds>(endTimepoint).time_since_epoch().count();
auto _ms = end - start;
_secs = _ms / 1000;
_ms -= _secs * 1000;
_mins = _secs / 60;
_secs -= _mins * 60;
_hour = _mins / 60;
_mins -= _hour * 60;
}
double GetTime(){
if(_stopped == true)
return _ms;
else{
Stop();
return _ms;
}
}
private:
std::chrono::time_point< std::chrono::steady_clock> _StartTimepoint;
double _secs,_ms,_mins,_hour;
bool _stopped = false;
};
Since I need to check the performances for different values of a parameter m I just run both the functions inside a for loop as you can see:
for (size_t m = MIN_M; m < MAX_M; m+=M_STEP){
m_array[m_cont] = m;
//simulate
double time_gpu,time_cpu;
Timer timer_gpu;
run_device(prcr_args,seeds,&m_array[m_cont]);
timer_gpu.Stop();
time_gpu = timer_gpu.GetTime();
Timer timer_cpu;
simulate_host(prcr_args,seeds,&m_array[m_cont]);
timer_cpu.Stop();
time_cpu = timer_cpu.GetTime();
double g = time_cpu/time_gpu;
ofs << m //stream to print the results
<< "," << time_cpu
<< "," << time_gpu
<< "," << g << "\n";
m_cont ++;
}
The problem is that the results i obtain are incredibly small and clearly wrong since they all are equal (the execution time should increase with m) and that my code requires a couple of minutes to run.
m,cpu_time,gpu_time,g
10,9.88131e-324,6.90979e-310,1.43004e-14
15,9.88131e-324,6.90979e-310,1.43004e-14
....
90,9.88131e-324,6.90979e-310,1.43004e-14
95,9.88131e-324,6.90979e-310,1.43004e-14
100,9.88131e-324,6.90979e-310,1.43004e-14
My guess is that the CPU doesn't execute the cycle sequentially and therefore starts and stops the clock immediately.
Aucun commentaire:
Enregistrer un commentaire