samedi 23 octobre 2021

C++ code stops mid-compiling without any error

I'm working on a Neural Network for classifying handwriten number (MNIST). Im using Mingw-w64 (c++ 11) on a windows 10 device. Im using Sublime Text 3 with the default build system provided by Sublime Text. My program builds normally, but after a few iterations it just stops compiling. matrixLibrary.h (second and third bunch of code) consitsts of a bunch of functions used in Neural Networking, while first bunch of code is the acctual network. Code is inspirred by (https://www.kaggle.com/wwsalmon/simple-mnist-nn-from-scratch-numpy-no-tf-keras/notebook), the dataset is also on the same site. I can't figure out why. Please help.

#include "matrixLibrary.h"

#define swap16(n) (((n&0xFF00)>>8)|((n&0x00FF)<<8))
#define swap32(n) ((swap16((n&0xFFFF0000)>>16))|((swap16(n&0x0000FFFF))<<16))

typedef unsigned char byte;

struct MNISTchar {
    std::vector<float> pixelData;
    std::vector<float> output;
    int label;
    MNISTchar() : pixelData(std::vector<float>()), output(std::vector<float>(10)), label(0) {}
};


class MNIST{
public:
    //MNIST loader by Peter Baumann (https://github.com/krck/MNIST-Loader) 
    const std::vector<MNISTchar> trainingData;
    const std::vector<MNISTchar> testData;
    
    MNIST(const std::string& path, int maxNumb)
    : trainingData(getMNISTdata(path + "train-images-idx3-ubyte", path + "train-labels-idx1-ubyte")),
        testData(getMNISTdata(path + "t10k-images-idx3-ubyte", path + "t10k-labels-idx1-ubyte")) {
            if(!this->trainingData.size()) { std::cout <<"ERROR: parsing training data" <<std::endl; }
            if(!this->testData.size()) { std::cout <<"ERROR: parsing testing data" <<std::endl; }
    }   

private:
    std::vector<MNISTchar> getMNISTdata(const std::string& imagepath, const std::string& labelpath) {
        std::vector<MNISTchar> tmpdata = std::vector<MNISTchar>();
        std::fstream file (imagepath, std::ifstream::in | std::ifstream::binary);
        int magicNum_images = 0, magicNum_labels = 0;
        int itemCount_images = 0, itemCount_labels = 0;
        if(file.is_open()) {
            int row_count = 0, col_count = 0;
            file.read((char*)&magicNum_images, 4);
            file.read((char*)&itemCount_images, 4);
            file.read((char*)&row_count, 4);
            file.read((char*)&col_count, 4);
            magicNum_images = swap32(magicNum_images);
            itemCount_images = swap32(itemCount_images);
            row_count = swap32(row_count);
            col_count= swap32(col_count);
            for (int i = 0; i < itemCount_images; i++) {
                MNISTchar tmpchar = MNISTchar();
                for(int r = 0; r < (row_count * col_count); r++) {
                    byte pixel = 0;
                    file.read((char*)&pixel, 1);
                    tmpchar.pixelData.push_back((float)pixel / 255);
                }
                tmpdata.push_back(tmpchar);
            }
        }
        file.close();
        file.open(labelpath, std::ifstream::in | std::ifstream::binary);
        if (file.is_open()) {
            file.read((char*)&magicNum_labels, 4);
            file.read((char*)&itemCount_labels, 4);
            magicNum_labels = swap32(magicNum_labels);
            itemCount_labels = swap32(itemCount_labels);
            if(itemCount_images == itemCount_labels) {
                for(MNISTchar& m : tmpdata) {
                    file.read((char*)&m.label, 1);
                    m.output[m.label] = 1.0f;
                }
            }
        }
        file.close();
        return tmpdata;
    }


public:
    void run(float minNumb, float maxNumb, float learningrate, short itereations){
        Helpers helpers = Helpers();

        short inputSize = 784;
        short dimension1 = 10;
        short outputSize = 10;


        matrixF* weights1 = helpers.initializeWeight(dimension1, inputSize, minNumb, maxNumb);
        matrixF* weights2 = helpers.initializeWeight(outputSize, dimension1, minNumb, maxNumb);

        vecF* bias1 = helpers.initializeBias(dimension1, minNumb, maxNumb);
        vecF* bias2 = helpers.initializeBias(outputSize, minNumb, maxNumb);

        matrixF* input = new matrixF(batchSize, vecF(inputSize));
        input->clear();
        matrixF* correctOutput = new matrixF(batchSize, vecF(outputSize));
        correctOutput->clear();

        for (int i = 0; i < batchSize; i++){
            input->push_back(trainingData[i].pixelData);
            correctOutput->push_back(trainingData[i].output);
        }

        input = helpers.transform(input);
        correctOutput = helpers.transform(correctOutput);

        matrixF* wbOut1 = 0;
        matrixF* wbOut2 = 0;

        matrixF* actFuncOut1 = 0;
        matrixF* actFuncOut2 = 0;

        float append = learningrate/batchSize;
        matrixF* dOut1 = new matrixF(batchSize, vecF(dimension1));

        for (short i = 0; i < itereations; i++){
            wbOut1 = helpers.forwardProp2(weights1, helpers.transform(input), bias1, batchSize, dimension1);
            actFuncOut1 = helpers.reLUMatrix(wbOut1);
            wbOut2 = helpers.forwardProp2(weights2, helpers.transform(actFuncOut1), bias2, batchSize, outputSize);
            actFuncOut2 = helpers.reLUMatrix(wbOut2);

            //backprop start
            matrixF* dOut2 = 0;
            dOut1->clear();

            dOut2 = helpers.subtractMatrices(actFuncOut2, correctOutput);

            helpers.outDeriv(dOut1, helpers.transform(weights2), helpers.transform(dOut2), wbOut1, dimension1);

            weights1 = helpers.subtractMatrices(weights1, helpers.fullDot(dOut1, input, dimension1, inputSize, append));
            bias1 = helpers.subtractVectors(bias1, helpers.sumMatrix(dOut1, append), dimension1);

            weights2 = helpers.subtractMatrices(weights2, helpers.fullDot(dOut2, actFuncOut1, dimension1, outputSize, append));
            bias2 = helpers.subtractVectors(bias2, helpers.sumMatrix(dOut2, append), outputSize);
            //backprop done
            std::cout<<i+1<<std::endl;

            helpers.getAccuracy(helpers.transform(actFuncOut2), helpers.transform(correctOutput));
            helpers.checkMemory(0);
        }
    }
};

typedef std::chrono::steady_clock clk;

int main(){
    const clk::time_point startTime = clk::now();
    MNIST mnist = MNIST("Dataset/", 1);
    const clk::time_point stopLoad = clk::now();
    float time = std::chrono::duration_cast<std::chrono::milliseconds>(stopLoad - startTime).count();
    std::cout<<"Load time: "<<time/1000<<" sec." <<std::endl;

    std::srand(std::time(nullptr));
    mnist.run(-0.5, 1, 0.01, 500);
    
    const clk::time_point stopTotal = clk::now();
    time = std::chrono::duration_cast<std::chrono::milliseconds>(stopTotal - startTime).count();
    std::cout<<"Total compile time: "<<time/1000<<" sec." <<std::endl;

    return 0;
}
#include"matrixLibrary.h"

Helpers::Helpers(){

}

Helpers::~Helpers(){

}

short Helpers::getSizeMat(matrixF* mat){
    short i = 0;
    for (vecF vec : *mat){
        i++;
    }
    return i;
}

short Helpers::getSizeVec(vecF* vec){
    short i = 0;
    for (float f : *vec){
        i++;
    }
    return i;
}

float Helpers::randNumb(float minNumb, float maxNumb){
    return minNumb + static_cast <float> (rand()) /( static_cast <float> (RAND_MAX/(maxNumb-minNumb)));
}

float Helpers::reLU(float numb){
    return (numb > 0) ? numb : 0;
}

float Helpers::reLUDeriv(float numb){
    return numb > 0;
}

matrixF* Helpers::reLUMatrix(matrixF* input){
    short i = getSizeMat(input);
    short j = getSizeVec(&(*input)[0]);
    matrixF* result = new matrixF(i, vecF(j));
    vecF* oneVec = new vecF(j);
    result->clear();
    for (vecF vec : *input){
        oneVec->clear();
        for (float f : vec){
            oneVec->push_back(reLU(f));
        }
        result->push_back(*oneVec);
    }
    return result;
}

float Helpers::sigmoid(float numb){
    return 1/(1+exp(-numb));
}

float Helpers::sigmoidDeriv(float numb){
    return exp(-numb)/pow((1+exp(-numb)), 2);
}

float Helpers::sigmoidInverse(float numb){
    return log(1/numb-1)/-1;
}

matrixF* Helpers::sigmoidMatrix(matrixF* input){
    short i = getSizeMat(input);
    short j = getSizeVec(&(*input)[0]);
    matrixF* result = new matrixF(i, vecF(j));
    vecF* oneVec = new vecF(j);
    result->clear();
    for (vecF vec : *input){
        oneVec->clear();
        for (float f : vec){
            oneVec->push_back(sigmoid(f));
        }
        result->push_back(*oneVec);
    }
    return result;
}

float Helpers::dotProduct(vecF* vector1, vecF* vector2) {
    float product = 0.0;
    short i = 0;
    for (float elem : *vector1){
        product += elem * (*vector2)[i];
        i++;
    }
    return product;
}

matrixF* Helpers::fullDot(matrixF* input1, matrixF* input2, short size1, short size2, float append){
    matrixF* result = new matrixF(size1, vecF(size2));
    vecF* oneVec = new vecF(size2);
    result->clear();
    for (vecF vec1 : *input1){
        oneVec->clear();
        for (vecF vec2 : *input2){
            oneVec->push_back(dotProduct(&vec1, &vec2) * append);
        }
        result->push_back(*oneVec);
    }
    return result;
}

void Helpers::outDeriv(matrixF* outMat, matrixF* input1, matrixF* input2, matrixF* input3, short size){
    vecF* oneVec = new vecF(size);
    short i = 0, j = 0;
    for (vecF vec : *input1){
        oneVec->clear();
        j = 0;
        for (vecF weight : *input2){
            oneVec->push_back(dotProduct(&weight, &vec) * reLUDeriv((*input3)[i][j]));
            j++;
        }
        outMat->push_back(*oneVec);
        i++;
    }
}

void Helpers::addVectors(vecF* vec1, vecF* vec2){
    short i = 0;
    for (float f : *vec1){
        (*vec1)[i] = f + (*vec2)[i];
        i++;
    }
}

void Helpers::addBias(matrixF* input, vecF* bias){
    short i = 0;
    for (vecF vec : *input){
        addVectors(&vec, bias);
        (*input)[i].assign(vec.begin(), vec.end());
        i++;
    }
}

matrixF* Helpers::forwardProp1(matrixF* input, matrixF* weights, vecF* bias, short size){
    matrixF* result = new matrixF();
    vecF* oneImg = new vecF(size);
    result->clear();
    short int i = 0;
    for (vecF image : *input){
        oneImg->clear();
        for (vecF weight : *weights){
            oneImg->push_back(sigmoid(dotProduct(&weight, &image) + (*bias)[i]));
        }
        result->push_back(*oneImg);
        i+=1;
    }
    return result;
}

matrixF* Helpers::forwardProp2(matrixF* input, matrixF* weights, vecF* bias, short size1, short size2){    
    matrixF* temp = 0;
    temp = fullDot(input, weights, size1, size2, 1);
    addBias(temp, bias);
    return temp;
}

float Helpers::squareError(vecF* output, vecF* correctOutput, short size){
    float gradient = 0.0;
    for (short int i = 0; i < size; i++){
        gradient += pow((*output)[i] - (*correctOutput)[i], 2);
    }
    return gradient;
}

vecF* Helpers::sumMatrix(matrixF* input, float append){
    vecF* result = new vecF(getSizeVec(&(*input)[0]));
    float tempf;
    result->clear();
    for (vecF vec : *input){
        tempf = 0;
        for (float f : vec){
            tempf += f * append;
        }
        result->push_back(tempf);
    }
    return result;
}

matrixF* Helpers::subtractMatrices(matrixF* mat1, matrixF* mat2){
    short size1 = getSizeMat(mat1);
    short size2 = getSizeVec(&(*mat1)[0]);
    matrixF* result = new matrixF(size1, vecF(size2));
    vecF* oneVec = new vecF(size2);
    result->clear();
    for (int i = 0; i < size1; i++){
        oneVec->clear();
        for (int j = 0; j < size2; j++){
            oneVec->push_back((*mat1)[i][j] - (*mat2)[i][j]);
        }
        result->push_back(*oneVec);
    }
    return result;
}

vecF* Helpers::subtractVectors(vecF* vec1, vecF* vec2, short size){
    vecF* result = new vecF(size);
    result->clear();
    for (int i = 0; i < size; i++){
        result->push_back((*vec1)[i] - (*vec2)[i]);
    }
    return result;
}

matrixF* Helpers::transform(matrixF* input){
    short size1 = getSizeMat(input);
    short size2 = getSizeVec(&(*input)[0]);
    matrixF* result = new matrixF(size2, vecF(size1));
    for (short i = 0; i < size1; i++){
        for (short j = 0; j < size2; j++){
            (*result)[j][i] = (*input)[i][j];
        }
    }
    return result; 
}

matrixF* Helpers::initializeWeight(short size1, short size2, float minNumb, float maxNumb){
    matrixF* weights = new matrixF(size1, vecF(size2));
    for (int i = 0; i < size1; i++){
        for (int j = 0; j < size2; j++){
            (*weights)[i][j] = randNumb(minNumb, maxNumb);
        }
    }
    return weights;
}

vecF* Helpers::initializeBias(short size, float minNumb, float maxNumb){
    vecF* bias = new vecF(size);
    for (int i = 0; i < size; i++){
        (*bias)[i] = randNumb(minNumb, maxNumb);
    }
    return bias;
}

void Helpers::checkSizeMatrix(matrixF* input){
    short t = 0, q = 0;      
    for (vecF vec : *input){
        q = 0;
        for (float x : vec){
            q += 1;
        }
        t+=1;
    }
    std::cout<<"Amount vectors: "<<t<<", amount elements each vector: "<<q<<". Quick: ("<<t<<", "<<q<<")"<<std::endl;
}

void Helpers::checkSizeVector(vecF* input){
    short t = 0;      
    for (float f : *input){
        t+=1;
    }
    std::cout<<"Elements: "<<t<<std::endl;
}

void Helpers::getAccuracy(matrixF* output3, matrixF* correctOutput){
    float total = 0.0;
    for (int i = 0; i < batchSize; i++){
        int max1 = std::max_element((*output3)[i].begin(), (*output3)[i].end()) - (*output3)[i].begin();
        int max2 = std::max_element((*correctOutput)[i].begin(), (*correctOutput)[i].end()) - (*correctOutput)[i].begin();
        if (max1 == max2){
            total += 1; 
        }
    }   
    std::cout<<"Accuracy is "<<total/batchSize*100<<"%"<<std::endl;
}

void Helpers::printMatrix(matrixF* input){
    for (vecF vec : *input){
        for (float f : vec){
            std::cout<<f<<", ";
        }
        std::cout<<std::endl;
    }
}

void Helpers::printVector(vecF* input){
    for (float f : *input){
        std::cout<<f<<", ";
    }
    std::cout<<std::endl;
}

void Helpers::checkMemory(bool fullPrint){
        //code by Anderas Masur https://forums.codeguru.com/showthread.php?280081-Get-available-memory
        MEMORYSTATUS MemStat;
        memset(&MemStat, 0, sizeof(MemStat));
        ::GlobalMemoryStatus(&MemStat);

        if (fullPrint){
            std::cout << "Length of structure: " << MemStat.dwLength
                << std::endl
                << "Memory usage: " << MemStat.dwMemoryLoad
                << " %" << std::endl
                << "Physical memory: " << MemStat.dwTotalPhys / 1024
                << " KB" << std::endl
                << "Free physical memory: " << MemStat.dwAvailPhys / 1024
                << " KB" << std::endl
                << "Paging file: " << MemStat.dwTotalPageFile / 1024
                << " KB" << std::endl
                << "Free paging file: " << MemStat.dwAvailPageFile / 1024
                << " KB" << std::endl
                << "Virtual memory: " << MemStat.dwTotalVirtual / 1024
                << " KB" << std::endl
                << "Free virtual memory: " << MemStat.dwAvailVirtual / 1024
                << " KB" << std::endl;
        } else{
            std::cout<<"Memory usage: "<< MemStat.dwMemoryLoad<<" %"<<std::endl;
        }
    }

void Helpers::checkForNanMatrix(matrixF* input){
    short total = 0;
    for (vecF vec : *input){
        for (float f : vec){
            //Nan compared to something is always false
            if (f != f){
                total+=1;
            }
        }
    }
    std::cout<<"There is "<<total<<" Nan in matrix"<<std::endl;
}

void Helpers::checkForNanVec(vecF* input){
    short total = 0;
    for (float f : *input){
        //Nan compared to something is always false 
        if (f != f){
            total+=1;
        }
    }
    std::cout<<"There is "<<total<<" Nan in vector"<<std::endl;
}
#ifndef matrixLibrary_h
#define matrixLibrary_h
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <chrono>
#include <random>
#include <math.h>
#include <numeric>
#include <algorithm>
#include <stdio.h>
#include <windows.h>
#include <unistd.h>
#include <memory.h>
#include <conio.h>

#define batchSize 1000
#define imgSize 748

typedef std::vector<float> vecF;
typedef std::vector<vecF> matrixF;

class Helpers{
public:
    Helpers();
    ~Helpers();

    float randNumb(float minNumb, float maxNumb);

    float reLU(float numb);
    float reLUDeriv(float numb);
    matrixF* reLUMatrix(matrixF* input);

    float sigmoid(float numb);
    float sigmoidDeriv(float numb);
    float sigmoidInverse(float numb);
    matrixF* sigmoidMatrix(matrixF* input);

    float dotProduct(vecF* vector1, vecF* vector2);
    float squareError(vecF* output, vecF* correctOutput, short size);

    matrixF* forwardProp1(matrixF* input, matrixF* weights, vecF* bias, short size);
    matrixF* forwardProp2(matrixF* input, matrixF* weights, vecF* bias, short size1, short size2);
    matrixF* fullDot(matrixF* input1, matrixF* input2, short size1, short size2, float append);
    matrixF* subtractMatrices(matrixF* mat1, matrixF* mat2);
    matrixF* transform(matrixF* input);
    matrixF* initializeWeight(short size1, short size2, float minNumb, float maxNumb);

    vecF* sumMatrix(matrixF* input, float append);    
    vecF* subtractVectors(vecF* vec1, vecF* vec2, short size);
    vecF* initializeBias(short size, float minNumb, float maxNumb);

    short getSizeMat(matrixF* mat);
    short getSizeVec(vecF* vec);

    void outDeriv(matrixF* outMat, matrixF* input1, matrixF* input2, matrixF* input3, short size);
    void addVectors(vecF* vec1, vecF* vec2);
    void addBias(matrixF* input, vecF* bias);

    void checkSizeMatrix(matrixF* input);
    void checkSizeVector(vecF* input);
    void getAccuracy(matrixF* output3, matrixF* correctOutput);
    void printMatrix(matrixF* input);
    void printVector(vecF* input);
    void checkMemory(bool fullPrint);
    void checkForNanMatrix(matrixF* input);
    void checkForNanVec(vecF* input);
};

#include"matrixLibrary.cpp"
#endif

Aucun commentaire:

Enregistrer un commentaire