I am currently working through a couple famous problems in C++ and am playing around with writing them in modern, optimized, C++. I am still fairly new to writing modern C++.
#include <iostream>
#include <vector>
#include <algorithm>
#include <unordered_map>
#include <stdlib.h>
#include <queue>
#include <chrono>
#include <utility>
#include <numeric>
#include <time.h>
using namespace std;
typedef std::chrono::high_resolution_clock::time_point TimeVar;
#define duration(a) std::chrono::duration_cast<std::chrono::nanoseconds>(a).count()
#define timeNow() std::chrono::high_resolution_clock::now()
unordered_map <int, int> m;
vector<int> topKFrequentLinear(int k){
vector <int> res;
res.reserve(m.size());
for (auto &p : m) res.push_back(p.first);
nth_element(res.begin(), res.begin() + k, res.end(), [](int l, int r){ return m[l] > m[r];});
// return vector<int> (res.begin(), res.begin() + k);
res.resize(k);
return res;
}
vector<int> topKFrequentHeap(int k) {
priority_queue<pair<int, int>, vector<pair<int, int>>, greater<pair<int, int>>> q;
for(auto & it: m){
if(q.size() == k){
if (q.top().first < it.second){
q.pop();
q.emplace(it.second, it.first);
}
} else {
q.emplace(it.second, it.first);
}
}
vector<int> f;
f.reserve(k);
while (!q.empty()) {
f.emplace_back(q.top().second);
q.pop();
}
return f;
}
vector<int> topKFrequentHeapManual(int k) {
vector<pair<int, int>> q;
q.reserve(k);
for(auto & it: m){
if(q.size() == k){
if (q.front().first < it.second){
pop_heap(q.begin(),q.end());
q.pop_back();
}
}
q.emplace_back(it.second, it.first);
push_heap(q.begin(), q.end());
}
vector <int> v;
v.reserve(k);
while (!q.empty() && v.size() < k) {
v.emplace_back(q.front().second);
pop_heap(q.begin(),q.end());
q.pop_back();
}
return v;
}
int main()
{
vector<int> input (10000000, 0);
srand(time(NULL));
for(auto & i : input){
i = rand() % 5000000;
}
for(auto & n : input) ++m[n];
int times = 1;
int k = 1000000;
TimeVar t1;
long double linear, heapApproach, manualHeapApproach;
t1 = timeNow();
topKFrequentLinear(k);
linear = duration(timeNow()-t1);
cout << "first " << linear << endl;
t1 = timeNow();
topKFrequentHeap(k);
heapApproach = duration(timeNow()-t1);
cout << "second " << heapApproach << endl;
t1 = timeNow();
topKFrequentHeapManual(k);
manualHeapApproach = duration(timeNow()-t1);
cout << "third " << manualHeapApproach << endl;
return 0;
}
compile with g++ -std=c++17 -O2 test.cpp
When I run the output is
first 1.69238e+09
second 6.42668e+08
third 5.70223e+08
My question is why is the first, linear time approach taking so much longer than the heap approaches?
Aucun commentaire:
Enregistrer un commentaire