vendredi 9 avril 2021

Performance difference OpenCV 4.5.1 vs 3.4.2

Having some really weird results with different version of opencv. I was seeing large performance differences with cv::calcHist for a 3 channel (RGB) masked image (does not using ipp as it only supports single channel). Using 3.4.2 cost an average 15 msecs, while 4.5.1 cost 0.7 msecs. You would perhaps suspect perf improvements in histogram.cpp code in opencv but not many changes have been made.

So I essentially copy cv::calcHist 4.5.1 code over to my test app below except Mat implementation code (removing unecessary intel ipp code).

Doing some performance profiling it said essentially cv::calcHist is faster with memory allocated from 4.5.1.

Surely theres's no big difference in Mat allocation.

To test this I essentially build below with 4.5.1 and 3.5.2 and compare performance

// HistogramBenchmark.cpp : This file contains the 'main' function. Program execution begins and ends there.
//

#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/core.hpp>
#include <iomanip>
#include <chrono>
#include <opencv2/highgui.hpp>



#define BATCH_SIZE 30
#define WIDTH 164
#define HEIGHT 196

void mycalcHist(const cv::Mat* images, int nimages, const int* channels,
    cv::InputArray _mask, cv::OutputArray _hist, int dims, const int* histSize,
    const float** ranges, bool uniform, bool accumulate);

void
mycalcHist_8u(std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    cv::Size imsize, cv::Mat& hist, int dims, const float** _ranges,
    const double* _uniranges, bool uniform);

typedef std::chrono::duration<double, std::chrono::milliseconds::period> Ms;

void testVersions();

int main()
{
    //testVersions();
    int channels[] = { 0, 1, 2 };
    float histRanges[] = { 0, 256 };
    int histSize[] = { 10, 10, 10 };
    const float* ranges[] = { histRanges, histRanges, histRanges };

    std::vector<cv::Mat> crops;
    std::vector<cv::Mat> masks;
    cv::Mat histogram;

    for (int i = 0; i < BATCH_SIZE; i++)
        masks.push_back(cv::Mat());

    for (int i = 0; i < BATCH_SIZE; i++)
        crops.push_back(cv::Mat(HEIGHT, WIDTH, CV_8UC3) * 255);

    // warm up 
    for (int i = 0; i < 20; i++)
    {
        for (int j = 0; j < BATCH_SIZE; j++)
        mycalcHist(&crops[i], 1, channels, masks[i], histogram, 3, histSize, ranges,
            true, false);
    }

    std::chrono::steady_clock::duration latencySum{ 0 };
    unsigned latencySamplesNum = 0;
    std::ostringstream latencyStream;

    // warm up 
    for (int i = 0; i < 200; i++)
    {
        std::chrono::steady_clock::time_point t0 = std::chrono::steady_clock::now();

        for (int j = 0; j < BATCH_SIZE; j++)
            mycalcHist(&crops[j], 1, channels, masks[j], histogram, 3, histSize, ranges,
                true, false);

        latencySum += std::chrono::steady_clock::now() - t0;
        latencySamplesNum += 1;
    }

    latencyStream.str("");
    latencyStream << std::fixed << std::setprecision(1)
        << (std::chrono::duration_cast<Ms>(latencySum) / latencySamplesNum).count() << " ms \n" << latencySamplesNum;
    std::cout << "Mean pipeline latency: " << latencyStream.str() << '\n';

    return 0;
}

void testVersions()
{
    int channels[] = { 0, 1, 2 };
    float histRanges[] = { 0, 256 };
    int histSize[] = { 10, 10, 10 };
    const float* ranges[] = { histRanges, histRanges, histRanges };

    auto img1 = cv::imread(R"(C:\Users\christopher.eviParke\Documents\Configuration\IPU1\images\Reference\1\132571809612622068_Hist3_Fac2.04.bmp)");
    cv::Mat img1_hist;
    cv::cvtColor(img1, img1, cv::COLOR_BGR2RGB);
    cv::calcHist(&img1, 1, channels, cv::Mat(), img1_hist, 3, histSize, ranges,
        true, false);

    auto img2 = cv::imread(R"(C:\Users\christopher.eviParke\Documents\Configuration\IPU1\images\Reference\2\132571827343906357_Hist3_Fac1.80.bmp)");
    cv::Mat img2_hist;
    cv::cvtColor(img2, img2, cv::COLOR_BGR2RGB);
    cv::calcHist(&img2, 1, channels, cv::Mat(), img2_hist, 3, histSize, ranges,
        true, false);

    auto dist = cv::compareHist(img1_hist, img2_hist, cv::HISTCMP_BHATTACHARYYA);

    return;
}

void myhistPrepareImages(const cv::Mat* images, int nimages, const int* channels,
    const cv::Mat& mask, int dims, const int* histSize,
    const float** ranges, bool uniform,
    std::vector<uchar*>& ptrs, std::vector<int>& deltas,
    cv::Size& imsize, std::vector<double>& uniranges)
{
    int i, j, c;
    CV_Assert(channels != 0 || nimages == dims);

    imsize = images[0].size();
    int depth = images[0].depth(), esz1 = (int)images[0].elemSize1();
    bool isContinuous = true;

    ptrs.resize(dims + 1);
    deltas.resize((dims + 1) * 2);

    for (i = 0; i < dims; i++)
    {
        if (!channels)
        {
            j = i;
            c = 0;
            CV_Assert(images[j].channels() == 1);
        }
        else
        {
            c = channels[i];
            CV_Assert(c >= 0);
            for (j = 0; j < nimages; c -= images[j].channels(), j++)
                if (c < images[j].channels())
                    break;
            CV_Assert(j < nimages);
        }

        CV_Assert(images[j].size() == imsize && images[j].depth() == depth);
        if (!images[j].isContinuous())
            isContinuous = false;
        ptrs[i] = images[j].data + c * esz1;
        deltas[i * 2] = images[j].channels();
        deltas[i * 2 + 1] = (int)(images[j].step / esz1 - imsize.width * deltas[i * 2]);
    }

    if (!mask.empty())
    {
        CV_Assert(mask.size() == imsize && mask.channels() == 1);
        isContinuous = isContinuous && mask.isContinuous();
        ptrs[dims] = mask.data;
        deltas[dims * 2] = 1;
        deltas[dims * 2 + 1] = (int)(mask.step / mask.elemSize1());
    }

    if (isContinuous)
    {
        imsize.width *= imsize.height;
        imsize.height = 1;
    }

    if (!ranges) // implicit uniform ranges for 8U
    {
        CV_Assert(depth == CV_8U);

        uniranges.resize(dims * 2);
        for (i = 0; i < dims; i++)
        {
            uniranges[i * 2] = histSize[i] / 256.;
            uniranges[i * 2 + 1] = 0;
        }
    }
    else if (uniform)
    {
        uniranges.resize(dims * 2);
        for (i = 0; i < dims; i++)
        {
            CV_Assert(ranges[i] && ranges[i][0] < ranges[i][1]);
            double low = ranges[i][0], high = ranges[i][1];
            double t = histSize[i] / (high - low);
            uniranges[i * 2] = t;
            uniranges[i * 2 + 1] = -t * low;
#if 0  // This should be true by math, but it is not accurate numerically
            CV_Assert(cvFloor(low * uniranges[i * 2] + uniranges[i * 2 + 1]) == 0);
            CV_Assert((high * uniranges[i * 2] + uniranges[i * 2 + 1]) < histSize[i]);
#endif
        }
    }
    else
    {
        for (i = 0; i < dims; i++)
        {
            size_t n = histSize[i];
            for (size_t k = 0; k < n; k++)
                CV_Assert(ranges[i][k] < ranges[i][k + 1]);
        }
    }
}


void mycalcHist(const cv::Mat* images, int nimages, const int* channels,
    cv::InputArray _mask, cv::OutputArray _hist, int dims, const int* histSize,
    const float** ranges, bool uniform, bool accumulate)
{
   

    const uchar* const histdata = _hist.getMat().ptr();

    if (_hist.empty())
    {
        _hist.create(dims, histSize, CV_32F);
    }
    cv::Mat hist = _hist.getMat();

    if (histdata != hist.data)
        accumulate = false;
    
    cv::Mat ihist = hist;
    ihist.flags = (ihist.flags & ~CV_MAT_TYPE_MASK) | CV_32S;

    if (!accumulate)
        hist = cv::Scalar(0.);
    else
        hist.convertTo(ihist, CV_32S);

    std::vector<uchar*> ptrs;
    std::vector<int> deltas;
    std::vector<double> uniranges;
    cv::Size imsize;
    cv::Mat mask = _mask.getMat();
    CV_Assert(mask.empty() || mask.type() == CV_8UC1);
    myhistPrepareImages(images, nimages, channels, mask, dims, hist.size, ranges,
        uniform, ptrs, deltas, imsize, uniranges);
    const double* _uniranges = uniform ? &uniranges[0] : 0;

    int depth = images[0].depth();

    mycalcHist_8u(ptrs, deltas, imsize, ihist, dims, ranges, _uniranges, uniform);

    ihist.convertTo(hist, CV_32F);
}

#define CV_CLAMP_INT(v, vmin, vmax) (v < vmin ? vmin : (vmax < v ? vmax : v))

void
mycalcHistLookupTables_8u(const cv::Mat& hist, const cv::SparseMat& shist,
    int dims, const float** ranges, const double* uniranges,
    bool uniform, bool issparse, std::vector<size_t>& _tab)
{
    static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t) * 8 - 2);

    const int low = 0, high = 256;
    int i, j;
    _tab.resize((high - low) * dims);
    size_t* tab = &_tab[0];

    if (uniform)
    {
        for (i = 0; i < dims; i++)
        {
            double a = uniranges[i * 2];
            double b = uniranges[i * 2 + 1];
            int sz = !issparse ? hist.size[i] : shist.size(i);
            size_t step = !issparse ? hist.step[i] : 1;

            double v_lo = ranges ? ranges[i][0] : 0;
            double v_hi = ranges ? ranges[i][1] : 256;

            for (j = low; j < high; j++)
            {
                int idx = cvFloor(j * a + b);
                size_t written_idx = OUT_OF_RANGE;
                if (j >= v_lo && j < v_hi)
                {
                    idx = CV_CLAMP_INT(idx, 0, sz - 1);
                    written_idx = idx * step;
                }
                tab[i * (high - low) + j - low] = written_idx;
            }
        }
    }
    else if (ranges)
    {
        for (i = 0; i < dims; i++)
        {
            int limit = std::min(cvCeil(ranges[i][0]), high);
            int idx = -1, sz = !issparse ? hist.size[i] : shist.size(i);
            size_t written_idx = OUT_OF_RANGE;
            size_t step = !issparse ? hist.step[i] : 1;

            for (j = low;;)
            {
                for (; j < limit; j++)
                    tab[i * (high - low) + j - low] = written_idx;

                if ((unsigned)(++idx) < (unsigned)sz)
                {
                    limit = std::min(cvCeil(ranges[i][idx + 1]), high);
                    written_idx = idx * step;
                }
                else
                {
                    for (; j < high; j++)
                        tab[i * (high - low) + j - low] = OUT_OF_RANGE;
                    break;
                }
            }
        }
    }
}

void
mycalcHist_8u(std::vector<uchar*>& _ptrs, const std::vector<int>& _deltas,
    cv::Size imsize, cv::Mat& hist, int dims, const float** _ranges,
    const double* _uniranges, bool uniform)
{
    static const size_t OUT_OF_RANGE = (size_t)1 << (sizeof(size_t) * 8 - 2);

    uchar** ptrs = &_ptrs[0];
    const int* deltas = &_deltas[0];
    uchar* H = hist.ptr();
    int x;
    const uchar* mask = _ptrs[dims];
    int mstep = _deltas[dims * 2 + 1];
    std::vector<size_t> _tab;

    mycalcHistLookupTables_8u(hist, cv::SparseMat(), dims, _ranges, _uniranges, uniform, false, _tab);
    const size_t* tab = &_tab[0];


    
    int d0 = deltas[0], step0 = deltas[1],
        d1 = deltas[2], step1 = deltas[3],
        d2 = deltas[4], step2 = deltas[5];

    const uchar* p0 = (const uchar*)ptrs[0];
    const uchar* p1 = (const uchar*)ptrs[1];
    const uchar* p2 = (const uchar*)ptrs[2];

    for (; imsize.height--; p0 += step0, p1 += step1, p2 += step2, mask += mstep)
    {
        if (!mask)
            for (x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2)
            {
                size_t idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512];
                if (idx < OUT_OF_RANGE)
                    ++* (int*)(H + idx);
            }
        else
            for (x = 0; x < imsize.width; x++, p0 += d0, p1 += d1, p2 += d2)
            {
                size_t idx;
                if (mask[x] && (idx = tab[*p0] + tab[*p1 + 256] + tab[*p2 + 512]) < OUT_OF_RANGE)
                    ++* (int*)(H + idx);
            }
    }
    
    
    
}

Aucun commentaire:

Enregistrer un commentaire