How to parallelize a complex for loop, for example counting sort, here my try with 5 thread:
void countingSort(const vector<int> &nums, vector<int> &histogram, int start, int end) {
for (int i = start; i < end && i < nums.size(); i++)
histogram[nums[i]]++;
}
int main(int argc, char *argv[]) {
int size = 30;
int NUM_OF_THREADS = 5;
vector<int> nums;
nums.reserve(size);
for(int i = 0; i < size; i++) nums.push_back(rand() % size);
// create "histograms":
vector<vector<int>> histograms(NUM_OF_THREADS);
for (auto &hist: histograms)
hist = vector<int>(size, 0);
int n = nums.size();
std::thread threads[NUM_OF_THREADS];
for (int i = 0, j = 0; i < n; i += n / NUM_OF_THREADS)
threads[j++] = thread(countingSort, ref(nums), ref(histograms[j]), i, i + n / NUM_OF_THREADS);
// join threads:
for (auto &t: threads)
if(t.joinable())
t.join();
for (int i = 0; i < size; i++)
histograms[0][i] += (histograms[1][i] + histograms[2][i] + histograms[3][i] + histograms[4][i]);
vector<int> res;
nums.reserve(size);
for (int i = 0; i < size; i++)
while(histograms[0][i]--) res.push_back(i);
for_each(res.begin(), res.end(), [](int i ) { cout << i << " ";});
return 0;
}
The problem is that it seems too complex to me, and there must be a method to do it in a more elegant and professional way
Aucun commentaire:
Enregistrer un commentaire