vendredi 22 mars 2019

No speed up by thread parallelization for feature detector

I want to get surf features on several images concurrently. I do not see any speed up after my implementation.

Here is the normal version of the process.

prev = high_resolution_clock::now();
    cout<<"Number of Images : "<<images.size()<<endl;

    for (int i = 0 ; i < images.size(); i++)
    {
        surf->detectAndCompute(bwImages[i], Mat(), keypoints, descriptors); 
    }

    current = high_resolution_clock::now(); 
    duration = duration_cast<microseconds>( current - prev ).count();
    cout<<"Time taken for SURF detector "<<duration<<" microseconds"<<endl;

The output is

Time taken for SURF detector 3058657 microseconds

This is my parallelized version

void getDescriptor(Mat bwImage,int i, map<int,vector<KeyPoint>> &keyPointMap,map<int,Mat> &descriptorMap)
{
    Mat descriptors;
    std::vector<KeyPoint> keypoints;
    Ptr<Feature2D> surf = SURF::create(SURF_HESSIAN_THRESHOLD);

    surf->detectAndCompute(bwImage, Mat(), keypoints, descriptors);

    std::lock_guard<std::mutex> guard(mut);
    keyPointMap[i] = keypoints;
    descriptorMap[i] = descriptors;
    return;
}

void getDescriptorsParallel(vector < Mat > &bwImages,map<int,vector<KeyPoint>> &keyPointMap,map<int,Mat> &descriptorMap)
{
    std::thread t[25];

    for (int i = 0 ; i < bwImages.size(); i++)
        t[i] = std::thread(getDescriptor,bwImages[i],i,
                std::ref(keyPointMap),std::ref(descriptorMap));

    for (int i = 0 ; i < bwImages.size(); i++)
        t[i].join();

}

It takes more time than the previous version. The time taken for getDescriptorsParallel(bwImages,keyPointMap,descriptorMap); is

3435328 microseconds

What am I missing here ?

Aucun commentaire:

Enregistrer un commentaire