mardi 25 juillet 2017

finding wildcard entries in a map efficiently

I have a map which contains strings as keys; those string resemble wildcards.

A key can have a * at the end, which means that when a lookup is performed, a string that has this key as a prefix shall match this key.

How can I efficiently retrieve the closest matching entry in such a map?

I tried sorting the map entries in a custom way and then using lower_bound, but that sorting does not produce the correct result:

#include <map>
#include <string>
#include <iostream>
#include <algorithm>

struct Compare {
    bool operator()(const std::string& lhs, const std::string& rhs) const
    {
        if (lhs.size() < rhs.size()) {
            return true;
        }

        if (lhs.size() > rhs.size()) {
            return false;
        }

        bool isWildcardlhsAtEnd = (!lhs.empty() && lhs.back() == '*');
        bool isWildcardrhsAtEnd = (!rhs.empty() && rhs.back() == '*');

        if (isWildcardlhsAtEnd && isWildcardrhsAtEnd) {
            return lhs < rhs;
        }
        auto lhSubString = lhs.substr(0, lhs.size() - 1);
        auto rhsSubString = rhs.substr(0, rhs.size() - 1);

        if (isWildcardlhsAtEnd || isWildcardrhsAtEnd) {
            if (lhSubString == rhsSubString) {
                return !isWildcardlhsAtEnd;
            }
            else {
                return lhSubString < rhsSubString;
            }
        }

        return lhs < rhs;
    }
};

template <typename Map>
void lookup(const Map& map, const std::string& key, int expected)
{
    auto it = map.lower_bound(key);
    if (it != map.end()) {
        std::cout << "found " << it->first << " for " << key << "; ";
        std::cout << "expected: " << expected << " got: " << it->second << std::endl;
    }
    else {
        std::cout << "did not find a match for " << key << std::endl;
    }
}

int main()
{
    std::map<std::string, int, Compare> map = {
        { "bar", 1 },
        { "bar*", 2 },
        { "foo1", 3 },
        { "bar1", 4 },
        { "bar1*", 5 },
        { "foo1*", 6 },
        { "bar12", 7 },
        { "bar12*", 8 },
        { "foo12", 9 },
        { "bar123", 10 },
        { "b*", 11 },
        { "f*", 12 },
        { "b", 13 },
        { "f", 14 }
    };

    std::cout << "sorted map \n------" << std::endl;
    std::for_each(map.begin(), map.end(), [](const auto& e) { std::cout << e.first << std::endl; });
    std::cout << "-------" << std::endl;

    lookup(map, "foo1", 3);
    lookup(map, "foo123", 6);
    lookup(map, "foo", 12);
    lookup(map, "bar1234", 8);
}

This produces the following output which demonstrates the incorrect lookup:

sorted map 
------
b
f
b*
f*
bar
bar1
bar*
foo1
bar12
bar1*
foo12
foo1*
bar123
bar12*
-------
found foo1 for foo1; expected: 3 got: 3
did not find a match for foo123
found bar1 for foo; expected: 12 got: 4
did not find a match for bar1234

Aucun commentaire:

Enregistrer un commentaire