samedi 29 juillet 2017

How should I iteratively filter data from big vectors without repeating checks?

I am trying to filter some data in the following scenario: I have a bunch of vector<double> representing n-dimensional points. They can be distinguished into UpperLevel (UL) points x and LowerLevel (LL) points y_i.

There is a single upper level problem and an upper level function f(x), corresponding to a set of UL points (stored in vector<vector<double>> UL_Points) and multiple lower level problems (LLPs) and lower level functions g_i(x,y_i), each corresponding to one set of LL points of varying dimensionalities.

For each UL point I evaluated the UL function for all points and created a std::multimap<double, vector<double> *> UL_Candidates ordering the UL points by their function value. (Low is good)

Now my task is to select a few LL points in such a way that they 'eliminate' a large number of preferably good UL points.

An UL point x is eliminated if there exists any LL point y_i such that the corresponding LL function g_i(x,y_i) is positive.

Since f and the g_i are continuous functions I am using the heuristic to check each UL point of the multimap from lowest to highest against consecutive points in the lower levels. Once I found a violation, g_i(x,y_i) > 0 I keep going until I find a decrease in g_i, and select the previous point y_i that caused the local maximum.

Once a point y_i is selected I do not need to check it again.

Below I posted two implementations:

  1. Uses a std::vector<std::list> to keep track of the indices of LL points in each LLP. If a LL point is selected it deletes the corresponding index.

  2. Directly deletes the LL points (which are stored in a vector<vector<vector<double>>> LL_Points_ of size data.nLLPs).

To my surprise version 2 is more than twice as fast as version 1. I previously tried another version with a forward list, was slightly faster than version 1 but still way slower than version 2.

Due to the required reordering I assumed the deletion of the vectors would be significantly slower, permitting the extra overhead of creating and maintaining the list but it seems I am wrong...

Another possiblility could be that my limited programming experience is causing me to do something stupid, or overlook something elementary, so by all means please enlighten me!

auto minNumPoints(5);
auto maxNumRuns(5); // Runs without a cut

//*/DEBUG:
t1 = std::chrono::steady_clock::now();
//*/
auto UL_Iter = UL_Candidates.begin();
auto UL_End = UL_Candidates.end();
vector<list<vector<double> *>> LL_Pointers(data.nLLPs);
for(auto LLP(0); LLP < data.nLLPs; ++LLP) {
    for(int p = 0; p < data.nLL_Points_[LLP]; ++p) {
        LL_Pointers[LLP].push_back(&LL_Points_[LLP][p]);
    }
}

std::multimap<double, vector<double> *> LL_Candidates1;
auto run(1);
double last = -INFINITY;
while(LL_Candidates1.size() < minNumPoints && run <= maxNumRuns) {
    for(auto LLP(0); LLP < data.nLLPs; ++LLP) {
        auto LL_Pointer = LL_Pointers[LLP].begin();
        auto LL_End = LL_Pointers[LLP].end();
        while (++LL_Pointer != LL_End) {
            tester.defineLL_Constants(LLP, *UL_Iter->second, **LL_Pointer);
            auto current = tester.test(LLP + 1);
            if(last > 0 && last > current) { // We've got a new cut...
                LL_Candidates1.emplace(
                                       (maxObjective - UL_Iter->first)/
                                       (maxObjective - minObjective),
                                       std::move(*(--LL_Pointer))
                                      );
                LL_Pointers[LLP].erase(LL_Pointer);
                last = -INFINITY;
                run = 0;
                if(++UL_Iter == UL_End) goto END1;
            }
            else {
                last = current;
            }
        }
        if(last > 0 && LL_Pointer != LL_End) { // positive local maximum at a boundary
            LL_Candidates1.emplace(
                                   (maxObjective - UL_Iter->first)/
                                   (maxObjective - minObjective),
                                   std::move(*(--LL_Pointer))
                                  );
            LL_Pointers[LLP].erase(LL_Pointer);
            last = -INFINITY;
            run = 0;
            if(++UL_Iter == UL_End) goto END1;
        }
    }
    if(run++ && ++UL_Iter == UL_End) goto END1; // increment if no cut
    // We checked all of the LL_Points_ at least once
}
END1:// All UL_Candidates (and thus all UL_Points) are infeasible


//*/DEBUG:
t2 = std::chrono::steady_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
cout << "LL-evaluation1: " << duration/1e6 << endl;
//*/

//*/DEBUG:
t1 = std::chrono::steady_clock::now();
//*/

UL_Iter = UL_Candidates.begin();
UL_End = UL_Candidates.end();
std::multimap<double, vector<double> *> LL_Candidates2;
auto toDelete = vector<size_t>();
toDelete.reserve(maxNumRuns);
run = 1;
last = -INFINITY;
while(LL_Candidates2.size() < minNumPoints && run <= maxNumRuns) {
    for(auto LLP(0); LLP < data.nLLPs; ++LLP) {
        for(auto p(0); p < data.nLL_Points_[LLP]; ++p) {
            tester.defineLL_Constants(LLP, *UL_Iter->second, LL_Points_[LLP][p]);
            auto current = tester.test(LLP + 1);
            if(last > 0 && last > current) { // We've got a new cut...
                LL_Candidates2.emplace(
                                       (maxObjective - UL_Iter->first)/
                                       (maxObjective - minObjective),
                                       std::move(&LL_Points_[LLP][p-1])
                                      );
                toDelete.push_back(p-1);
                last = -INFINITY;
                run = 0;
                if(++UL_Iter == UL_End) goto END2;
            }
            last = current;
        }
        if(last > 0) { // We had positive local maximum at a boundary
            LL_Candidates2.emplace(
                                   (maxObjective - UL_Iter->first)/
                                   (maxObjective - minObjective),
                                   std::move(&LL_Points_[LLP].back())
                                  );
            toDelete.push_back(data.nLL_Points_[LLP]-1);
            last = -INFINITY;
            run = 0;
            if(++UL_Iter == UL_End) goto END2;
        }
        if(!toDelete.empty()) {
            auto deleted(0);
            for(auto index : toDelete) {
                LL_Points_[LLP].erase(LL_Points_[LLP].begin() + index - deleted++);
            }
            data.nLL_Points_[LLP] -= deleted;
            data.nLL_Points -= deleted;
            toDelete.clear();
        }
    }
    if(run++ && ++UL_Iter == UL_End) goto END2; // increment if no cut
// We checked all of the LL_Points_ at least once
}
//*/
END2:// All UL_Candidates (and thus all UL_Points) are infeasible
//*/DEBUG:
t2 = std::chrono::steady_clock::now();
duration = std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count();
cout << "LL-evaluation2: " << duration/1e6 << endl;
//*/

Aucun commentaire:

Enregistrer un commentaire