I'm using an unordered_map as a sparse 3D-array (128 x 128 x 128) to insert values into a grid, provided the grid cell is still free.
Up until now I always checked with find() if the cell is free and if it is, then I've added an element using insert() or emplace(). Now I've found that I can use the return value of insert and emplace to check if the element has been added or if there was already an element with the same key inside the map. I thought this could improve performance since I could completely remove the usage of find.
As it turns out, rather than improving the performance by inserting without find, the performance actually decreased and I'm not sure why.
I've reduced my application to this example where points are randomly generated and then inserted into the grid.
#include <unordered_map>
#include <random>
#include <chrono>
#include <iostream>
#include <math.h>
#include <algorithm>
#include <string>
using std::cout;
using std::endl;
using std::chrono::high_resolution_clock;
using std::chrono::milliseconds;
using std::chrono::duration_cast;
using std::unordered_map;
int num_elements = 5'000'000;
void findThenInsert(){
cout << endl << "find and emplace" << endl;
auto start = high_resolution_clock::now();
std::mt19937 gen(123);
std::uniform_real_distribution<> dis(0, 128);
unordered_map<int, int> grid;
int count = 0;
for(int i = 0; i < num_elements; i++){
float x = dis(gen);
float y = dis(gen);
float z = (cos(x*0.1) * sin(x*0.1) + 1.0) * 64.0;
int index = int(x) + int(y) * 128 + int(z) * 128 * 128;
auto it = grid.find(index);
if(it == grid.end()){
grid.emplace(index, count);
count++;
}
}
cout << "elements: " << count << endl;
cout << "load factor: " << grid.load_factor() << endl;
auto end = high_resolution_clock::now();
long long duration = duration_cast<milliseconds>(end - start).count();
float seconds = duration / 1000.0f;
cout << seconds << "s" << endl;
}
void insertThenCheckForSuccess(){
cout << endl << "emplace and check success" << endl;
auto start = high_resolution_clock::now();
std::mt19937 gen(123);
std::uniform_real_distribution<> dis(0, 128);
unordered_map<int, int> grid;
int count = 0;
for(int i = 0; i < num_elements; i++){
float x = dis(gen);
float y = dis(gen);
float z = (cos(x*0.1) * sin(x*0.1) + 1.0) * 64.0;
int index = int(x) + int(y) * 128 + int(z) * 128 * 128;
auto it = grid.emplace(index, count);
if(it.second){
count++;
}
}
cout << "elements: " << count << endl;
cout << "load factor: " << grid.load_factor() << endl;
auto end = high_resolution_clock::now();
long long duration = duration_cast<milliseconds>(end - start).count();
float seconds = duration / 1000.0f;
cout << seconds << "s" << endl;
}
int main(){
findThenInsert();
insertThenCheckForSuccess();
}
In both cases the size of the map is 82901 afterwards so I assume the result is exactly the same.
find and emplace: 0.937s emplace then check: 1.268s
Aucun commentaire:
Enregistrer un commentaire