I am trying to write a small parallel for loop. However, the output gives me 0 1, 2,3,4,5,6,7,5,4,0,0,0,0,0.. etc I'm testing if it is the way I'm passing iterators, but I'm quite sure. what is a better way to implement a template parallel for loop?
#include <iostream>
#include <iterator>
#include <vector>
#include <thread>
#include <memory>
using namespace std;
template<class InputIterator, class OutputIterator, class Func>
void parallel_for(InputIterator begin, InputIterator end, OutputIterator result, Func func )
{
unsigned int num_elements = std::distance(begin,end);
if( (std::thread::hardware_concurrency() == 1) || (std::thread::hardware_concurrency() > num_elements) )
{
func(begin,end,result);
}
else
{
std::vector<std::unique_ptr<std::thread>> tasks;
const unsigned int num_threads = std::thread::hardware_concurrency();
unsigned int grp = num_elements / num_threads;
while( begin != end)
{
if( num_elements > grp )
{
tasks.push_back(unique_ptr<std::thread>(new std::thread(func, begin, begin + (grp - 1), result)));
begin += grp;
num_elements -= grp;
}
else
{
tasks.push_back(unique_ptr<std::thread>(new std::thread(func, begin, begin + num_elements, result)));
begin += num_elements;
}
}
for(auto & task : tasks)
task->join();
}
}
int main() {
vector <int> a1= {0,1,2,3,4,5,6,7,5,4,6,7,8,8,5,4,5,6,7,4,5,6,7,8,5,4,6,7,8,9,7,5,4,5,6,7,8,6,5,4,3,33,45,67,100};
vector<int> a2(45);
parallel_for(a1.begin(),a1.end(),a2.begin(),[](auto a,auto b, auto c)
{
while(a!=b)
{
*c = *a;
++a;
++c;
}
});
for(auto i : a2)
cout<<i<<"\n";
return 0;
}
Aucun commentaire:
Enregistrer un commentaire