mercredi 15 juin 2022

C++ How to output chunks of file with complete lines from this algorithm?

I have an extremely huge .csv file (with no headers) and I want to bulk insert it into SQLite database using C++.

I found this algorithm to be the fastest I need.

So, I have this piece of code:

void readFileFast(ifstream &file, void(*lineHandler)(char*str, int length, __int64 absPos)){
        int BUF_SIZE = 40000;
        file.seekg(0,ios::end);
        ifstream::pos_type p = file.tellg();
#ifdef WIN32
        __int64 fileSize = *(__int64*)(((char*)&p) +8);
#else
        __int64 fileSize = p;
#endif
        file.seekg(0,ios::beg);
        BUF_SIZE = min(BUF_SIZE, fileSize);
        char* buf = new char[BUF_SIZE];
        int bufLength = BUF_SIZE;
        file.read(buf, bufLength);

        int strEnd = -1;
        int strStart;
        __int64 bufPosInFile = 0;
        while (bufLength > 0) {
            int i = strEnd + 1;
            strStart = strEnd;
            strEnd = -1;
            for (; i < bufLength && i + bufPosInFile < fileSize; i++) {
                if (buf[i] == '\n') {
                    strEnd = i;
                    break;
                }
            }

            if (strEnd == -1) { // scroll buffer
                if (strStart == -1) {
                    lineHandler(buf + strStart + 1, bufLength, bufPosInFile + strStart + 1);
                    bufPosInFile += bufLength;
                    bufLength = min(bufLength, fileSize - bufPosInFile);
                    delete[]buf;
                    buf = new char[bufLength];
                    file.read(buf, bufLength);
                } else {
                    int movedLength = bufLength - strStart - 1;
                    memmove(buf,buf+strStart+1,movedLength);
                    bufPosInFile += strStart + 1;
                    int readSize = min(bufLength - movedLength, fileSize - bufPosInFile - movedLength);

                    if (readSize != 0)
                        file.read(buf + movedLength, readSize);
                    if (movedLength + readSize < bufLength) {
                        char *tmpbuf = new char[movedLength + readSize];
                        memmove(tmpbuf,buf,movedLength+readSize);
                        delete[]buf;
                        buf = tmpbuf;
                        bufLength = movedLength + readSize;
                    }
                    strEnd = -1;
                }
            } else {
                lineHandler(buf+ strStart + 1, strEnd - strStart, bufPosInFile + strStart + 1);
            }
        }
        lineHandler(0, 0, 0);//eof
}

void lineHandler(char*buf, int l, __int64 pos){
    if(buf==0) return;
    string s = string(buf, l);
    printf(s.c_str());
}

void loadFile(){
    ifstream infile("file");
    readFileFast(infile,lineHandler);
}

And I want to first output first let's say 100.000 complete full lines (not half line at the end of each chunk) or so in order to bulk insert them after into my SQLite database file.

But how to retrieve them?

I tried this:

int main() {
    ifstream ifile("./data.txt", std::ifstream::binary);
    if (ifile.good())
    {
        while (true)
        {
            readFileFast(ifile, lineHandler);
            cout<<lineHandler;
            if(!ifile) break;
            cout<<"------------------------------------------"<<endl;

        }
        // close file
        ifile.close();
    }else{
        cout<<"File not found!"<<endl;
    }

    return 0;
}

But it is not working as it prints 1 very time and I want 100.000 complete lines (not half line at the end) in order to be able to bulk insert them into SQLite.

Thank you in advance!

P.S. I found also this algorithm: https://cplusplus.com/forum/beginner/194071/

But it prints lines and almost every time the last line is just a half line but I need full complete lines in order to bulk insert them all at once into SQLite database.

Aucun commentaire:

Enregistrer un commentaire