I've created a simple test to check how std::memory_order_relaxed is faster than std::memory_order_seq_cst value for atomic increment. However the performance was the same for both cases. My compiler: gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04) Build arguments: g++ -m64 -O3 main.cpp -std=c++17 -lpthread CPU: Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz, 4 core, 2 thread per core Test code:
#include <vector>
#include <iostream>
#include <thread>
#include <atomic>
#include <chrono>
#include <functional>
std::atomic<int> cnt = {0};
void run_test_order_relaxed()
{
std::vector<std::thread> v;
for (int n = 0; n < 4; ++n) {
v.emplace_back([]() {
for (int n = 0; n < 30000000; ++n) {
cnt.fetch_add(1, std::memory_order_relaxed);
}
});
}
std::cout << "rel: " << cnt.load(std::memory_order_relaxed);
for (auto& t : v)
t.join();
}
void run_test_order_cst()
{
std::vector<std::thread> v;
for (int n = 0; n < 4; ++n) {
v.emplace_back([]() {
for (int n = 0; n < 30000000; ++n) {
cnt.fetch_add(1, std::memory_order_seq_cst);
}
});
}
std::cout << "cst: " << cnt.load(std::memory_order_seq_cst);
for (auto& t : v)
t.join();
}
void measure_duration(const std::function<void()>& func)
{
using namespace std::chrono;
high_resolution_clock::time_point t1 = high_resolution_clock::now();
func();
high_resolution_clock::time_point t2 = high_resolution_clock::now();
auto duration = duration_cast<milliseconds>( t2 - t1 ).count();
std::cout << " duration: " << duration << "ms" << std::endl;
}
int main()
{
measure_duration(&run_test_order_relaxed);
measure_duration(&run_test_order_cst);
return 0;
}
Why does std::memory_order_relaxed and std::memory_order_seq_cst always produce almost the same results? Result: rel: 2411 duration: 4440ms cst: 120000164 duration: 4443ms
Aucun commentaire:
Enregistrer un commentaire