samedi 23 mai 2020

Why the search functions are catching only the first occurrence on the text?

I am having problems with these pattern matching algorithms. It was supposed to catch all the occurrences that appears on the texts. I want all the search functions to get all occurrences on a .txt file. Can anybody spot the part that I have to change? And, How can i use a plain-table on Sunday Algorithm and not unordered_map? Thank Yoou :)

#include "header.h"
#include <unordered_map>
#include <chrono>
#include <iostream>
#include <cmath>
#include <math.h>


using namespace std;


int BruteForce::search(string haystack, string needle) {
    if (haystack.length() < needle.length()) {
        return -1;
    }

    for (int i = 0; i <= haystack.length() - needle.length(); i++) {
        int j = 0;
        while((j < needle.length()) && (haystack[i + j] == needle[j])) {
            j++;
        }
        if (j == needle.length()) {
            return i;
        }
    }

    return -1;
}



int Sunday::search(string haystack, string needle) {
    // Boyer–Moore–Horspool algorithm

    int n = haystack.length();
    int m = needle.length();
    if (m > n || m == 0) {
        return -1;
    }
    unordered_map<char, int> offsetTable;
    for (int i = 0; i <= 255; i++) {
        offsetTable.emplace((char) i, m);
    }
    for (int i = 0; i < m - 1; i++) {
        offsetTable[needle[i]] = m - i - 1;
    }

    int skip = 0;
    while (n - skip >= m) {
        int i = m - 1;
        while (haystack[skip + i] == needle[i]) {
            if (i == 0) {
                return skip;
            }
            i--;
        }
        skip = skip + offsetTable[haystack[skip + m - 1]];
    }
    return -1;
}



int KMP::search(string haystack, string needle) {
    haystack = needle + "@" + haystack;
    int len = haystack.length();
    int* pf = new int[len];

    pf[0] = 0;
    int k = 0;
    for (int i = 1; i < haystack.length(); i++) {
        while ((k > 0) && (haystack[k] != haystack[i])) {
            k = pf[k - 1];
        }
        if (haystack[k] == haystack[i]) {
            ++k;
        }
        pf[i] = k;
        if (k == needle.length()) {
            return (i - 2 * needle.length());
        }
    }
    return -1;
}



int RabinKarp::search(string haystack, string needle) {
    if (haystack.length() < needle.length()) {
        return -1;
    }

    int hash_haystack = hash(haystack.substr(0, needle.length()));
    int hash_needle = hash(needle);
    for (int i = 1; i <= haystack.length() - needle.length(); i++) {
        if (hash_haystack == hash_needle) {
            return i;
        }
        hash_haystack = hash_haystack - hash(haystack[i-1], needle.length() - 1);
        hash_haystack *= 128;
        hash_haystack += hash(haystack[i + needle.length()], 0);
    }
    return -1;
}



long long int RabinKarp::hash(string str) {
    long long int h = 0;
    for (int i = 0; i < str.length(); i++) {
        h = (h + hash(str[i], str.length() - i - 1)) % ULONG_MAX;
    }
    return h;
}



long long int RabinKarp::hash(char ch, int k) {
    int base = 128;
    return (int)ch * (long long int)pow(base, k);
}



int FSM::getNextState(string needle, int M, int state, int x)
{
    if (state < M && x == needle[state])
        return state+1;

    int ns, i;
    for (ns = state; ns > 0; ns--)
    {
        if (needle[ns-1] == x)
        {
            for (i = 0; i < ns-1; i++)
                if (needle[i] != needle[state-ns+1+i])
                    break;
            if (i == ns-1)
                return ns;
        }
    }

    return 0;
}



void FSM::computeTF(string needle, int M, int** TF)
{
    int state, x;
    for (state = 0; state <= M; ++state)
        for (x = 0; x < 256; ++x)
            TF[state][x] = getNextState(needle, M, state, x);
}

int FSM::search(string haystack, string needle)
{
    int M = needle.length();
    int N = haystack.length();

    int** TF = new int*[M+1];
    for (int i = 0; i < M+1; i++) {
        TF[i] = new int[256];
    }

    computeTF(needle, M, TF);

    int i, state=0;
    for (i = 0; i < N; i++)
    {
        state = TF[state][haystack[i]];
        if (state == M) {
            return i-M+1;
        }
    }
    return -1;
}



long long Time::check(int (*func)(string, string), string haystack, string needle) {
   chrono::high_resolution_clock::time_point start = chrono::high_resolution_clock::now();
   (*func)(haystack, needle);
   return chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - start).count();
}

Aucun commentaire:

Enregistrer un commentaire