vendredi 5 janvier 2018

implementing 3 layers neural network for MNIST digits classification in C++

I am implementing a neural network for MNIST classification using C++, nevertheless my neural network always performs as a random classifier (accuracy ~0.1). I cannot spot where the error lays, could you please help me out???

Here there is the code for the Neural Network trainer:

#include "neuralNetworkTrainer.hpp"
#include <assert.h>
#include <iostream>

NetworkTrainer::NetworkTrainer(Settings const& settings, NetworkModel* pNetwork):
pNetwork(pNetwork),
learningRate(settings.learningRate),
desiredAccuracy(settings.desiredAccuracy),
maxEpochs(settings.maxEpochs),
currentEpoch(0),
testSetAccuracy(0),
validationSetAccuracy(0),
trainCrossEntropy(0)
{
    assert(pNetwork != nullptr);

    //Allocate memory
    deltaInputHidden.resize(pNetwork->weightsInputHidden.size());
    deltaHiddenOutput.resize(pNetwork->weightsHiddenOutput.size());
    errorGradientHidden.resize(pNetwork->hiddenNeurons.size());
    errorGradientOutput.resize(pNetwork->outputNeurons.size());

    //Initialization
    memset(deltaInputHidden.data(), 0, deltaInputHidden.size()*sizeof(double));
    memset(deltaHiddenOutput.data(), 0, deltaHiddenOutput.size()*sizeof(double));
    memset(errorGradientHidden.data(), 0, errorGradientHidden.size()*sizeof(double));
    memset(errorGradientOutput.data(), 0, errorGradientOutput.size()*sizeof(double));
}

void NetworkTrainer::train(dataset const& data){


 //Reset training state
    currentEpoch=0;
    testSetAccuracy=0;
    trainCrossEntropy=0;

    cout << "######################################## " << endl;
    cout << "Start Training" << endl;
    cout << "######################################## " << endl;

    while(validationSetAccuracy < desiredAccuracy && currentEpoch < maxEpochs){
        //Train the network for one epoch
        runEpoch(data.trainSet);

        //Get accuracy on the validation set
        validationSetAccuracy = getAccuracy(data.validationSet);

        cout << "Epoch: " + to_string(currentEpoch) + " accuracy on validation set: " + to_string(validationSetAccuracy) <<endl;

        currentEpoch++;
    }

    cout << "######################################## " << endl;
    cout << "End training " << endl;
    cout << "######################################## " << endl;

    //Get accuracy on the test set
    testSetAccuracy = getAccuracy(data.testSet);

    cout << "Final accuracy on the test set: " << testSetAccuracy << endl;
 }

//Get the error made by the gradient in the node j of the hidden layer
//In order to do so we compute the equation \delta_{j} = (1-tanh^2(z_j)) \sum_{k=1}^M w_{jk} \delta{ouput}
// where z_1 represents the output of the hidden layer before the non-linearity is applied and
// M represents the number of neurons in the output layer

double NetworkTrainer::getErrorGradientHidden(int hiddenIdx){
    double weightedSum = 0;
    for(int outputIdx=0; outputIdx < pNetwork->numOutputs; outputIdx++){
        int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
        weightedSum += pNetwork->weightsHiddenOutput[weightIdx]*errorGradientOutput[outputIdx];
    }
    //return (1.0 - pow(tanh(pNetwork->weightsHiddenOutput[hiddenIdx]), 2))*weightedSum;
    return (1.0 - pow(pNetwork->hiddenNeurons[hiddenIdx],2))*weightedSum;
}

//The function backpropagate is used to compute how much each weight has to change.
//The delta for each weight is computed as the ptoduct of:
//-Learning rate
//-the value of the neuron
//-the error of the gradient in that neuron
void NetworkTrainer::backpropagate(const vector<double> &trueLabels){

    //Get the gradient error for each output node and compute how much each weight has to change
    for(int outputIdx=0; outputIdx<pNetwork->numOutputs; outputIdx++){
        errorGradientOutput[outputIdx] = getErrorGradientOutput(trueLabels[outputIdx], pNetwork->outputNeurons[outputIdx]);

        for(int hiddenIdx=0; hiddenIdx <= pNetwork->numHidden; hiddenIdx++){
            int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
            deltaHiddenOutput[weightIdx] = learningRate * pNetwork-> hiddenNeurons[hiddenIdx]*errorGradientOutput[outputIdx];// + 0.9*deltaHiddenOutput[weightIdx];
        }
    }
    //Get the gradient error for each hidden node and compute how much each weight has to change
    for(int hiddenIdx=0; hiddenIdx<=pNetwork->numHidden; hiddenIdx++){
        errorGradientHidden[hiddenIdx] = getErrorGradientHidden(hiddenIdx);
        for(int inputIdx=0; inputIdx <= pNetwork->numInputs; inputIdx++){
            int weightIdx = pNetwork->getInputHiddenWeightIndex(inputIdx, hiddenIdx);
            deltaInputHidden[weightIdx] = learningRate * pNetwork-> inputNeurons[inputIdx]*errorGradientHidden[hiddenIdx];// + 0.9*deltaInputHidden[weightIdx];
        }
    }
}

void NetworkTrainer::updateWeights(){

    for(int inputIdx=0; inputIdx <=pNetwork->numInputs; inputIdx++){
        for(int hiddenIdx=0; hiddenIdx<=pNetwork->numHidden; hiddenIdx++){
            int weightIdx = pNetwork->getInputHiddenWeightIndex(inputIdx, hiddenIdx);
            pNetwork->weightsInputHidden[weightIdx] -= deltaInputHidden[weightIdx];
        }
    }
    for(int hiddenIdx=0; hiddenIdx <=pNetwork->numHidden; hiddenIdx++){
        for(int outputIdx=0; outputIdx<pNetwork->numOutputs; outputIdx++){
            int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
            pNetwork->weightsHiddenOutput[weightIdx] -= deltaHiddenOutput[weightIdx];
        }
    }

}

void NetworkTrainer::runEpoch(vector<data> const& trainSet){

    for(auto const& d: trainSet){
        pNetwork->evaluate(d.X);
        backpropagate(d.Y);
        //We use stochastic gradient descent, and thus at each datapoint we update the weights
        updateWeights();
        double trainSetAccuracy = getAccuracy(trainSet);
        cout << trainSetAccuracy << endl;
    }
}

double NetworkTrainer::getAccuracy(const vector<data> &testSet){

    double accuracy = 0;
    int numIncorrectResults=0;
    double totDatapoints = testSet.size();

    for(auto const& d: testSet){

        vector<double>::iterator i;
        i = max_element(pNetwork->outputNeurons.begin(), pNetwork->outputNeurons.end());
        double prediction = distance(pNetwork->outputNeurons.begin(), i);

        bool found = 0;
        double target = -1;
        for(int i=0; i < pNetwork->numOutputs && !found; i++){
            if(d.Y[i] ==1){
                target=i;
                found=1;
            }
        }
        assert(target>-1);
        if(target!=prediction)
            numIncorrectResults++;
    }
    accuracy = 1.0 - numIncorrectResults/totDatapoints;
    return accuracy;
}

vector<data> NetworkTrainer::createDataset(vector<vector<double>> X, vector<vector<double>> Y){
    vector<data> dataset;
    dataset.resize(X.size());
    for(int i=0; i<X.size(); i++){
        dataset[i].X = X[i];
        dataset[i].Y = Y[i];
    }
    return dataset;
}

Further, I implement the following function in the .hpp file:

//Member functions
    inline double getErrorGradientOutput(double desiredOutput, double output){
        return desiredOutput-output;
    };

Please help me! I really cannot spot the bug!

Aucun commentaire:

Enregistrer un commentaire