I am implementing a neural network for MNIST classification using C++, nevertheless my neural network always performs as a random classifier (accuracy ~0.1). I cannot spot where the error lays, could you please help me out???
Here there is the code for the Neural Network trainer:
#include "neuralNetworkTrainer.hpp"
#include <assert.h>
#include <iostream>
NetworkTrainer::NetworkTrainer(Settings const& settings, NetworkModel* pNetwork):
pNetwork(pNetwork),
learningRate(settings.learningRate),
desiredAccuracy(settings.desiredAccuracy),
maxEpochs(settings.maxEpochs),
currentEpoch(0),
testSetAccuracy(0),
validationSetAccuracy(0),
trainCrossEntropy(0)
{
assert(pNetwork != nullptr);
//Allocate memory
deltaInputHidden.resize(pNetwork->weightsInputHidden.size());
deltaHiddenOutput.resize(pNetwork->weightsHiddenOutput.size());
errorGradientHidden.resize(pNetwork->hiddenNeurons.size());
errorGradientOutput.resize(pNetwork->outputNeurons.size());
//Initialization
memset(deltaInputHidden.data(), 0, deltaInputHidden.size()*sizeof(double));
memset(deltaHiddenOutput.data(), 0, deltaHiddenOutput.size()*sizeof(double));
memset(errorGradientHidden.data(), 0, errorGradientHidden.size()*sizeof(double));
memset(errorGradientOutput.data(), 0, errorGradientOutput.size()*sizeof(double));
}
void NetworkTrainer::train(dataset const& data){
//Reset training state
currentEpoch=0;
testSetAccuracy=0;
trainCrossEntropy=0;
cout << "######################################## " << endl;
cout << "Start Training" << endl;
cout << "######################################## " << endl;
while(validationSetAccuracy < desiredAccuracy && currentEpoch < maxEpochs){
//Train the network for one epoch
runEpoch(data.trainSet);
//Get accuracy on the validation set
validationSetAccuracy = getAccuracy(data.validationSet);
cout << "Epoch: " + to_string(currentEpoch) + " accuracy on validation set: " + to_string(validationSetAccuracy) <<endl;
currentEpoch++;
}
cout << "######################################## " << endl;
cout << "End training " << endl;
cout << "######################################## " << endl;
//Get accuracy on the test set
testSetAccuracy = getAccuracy(data.testSet);
cout << "Final accuracy on the test set: " << testSetAccuracy << endl;
}
//Get the error made by the gradient in the node j of the hidden layer
//In order to do so we compute the equation \delta_{j} = (1-tanh^2(z_j)) \sum_{k=1}^M w_{jk} \delta{ouput}
// where z_1 represents the output of the hidden layer before the non-linearity is applied and
// M represents the number of neurons in the output layer
double NetworkTrainer::getErrorGradientHidden(int hiddenIdx){
double weightedSum = 0;
for(int outputIdx=0; outputIdx < pNetwork->numOutputs; outputIdx++){
int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
weightedSum += pNetwork->weightsHiddenOutput[weightIdx]*errorGradientOutput[outputIdx];
}
//return (1.0 - pow(tanh(pNetwork->weightsHiddenOutput[hiddenIdx]), 2))*weightedSum;
return (1.0 - pow(pNetwork->hiddenNeurons[hiddenIdx],2))*weightedSum;
}
//The function backpropagate is used to compute how much each weight has to change.
//The delta for each weight is computed as the ptoduct of:
//-Learning rate
//-the value of the neuron
//-the error of the gradient in that neuron
void NetworkTrainer::backpropagate(const vector<double> &trueLabels){
//Get the gradient error for each output node and compute how much each weight has to change
for(int outputIdx=0; outputIdx<pNetwork->numOutputs; outputIdx++){
errorGradientOutput[outputIdx] = getErrorGradientOutput(trueLabels[outputIdx], pNetwork->outputNeurons[outputIdx]);
for(int hiddenIdx=0; hiddenIdx <= pNetwork->numHidden; hiddenIdx++){
int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
deltaHiddenOutput[weightIdx] = learningRate * pNetwork-> hiddenNeurons[hiddenIdx]*errorGradientOutput[outputIdx];// + 0.9*deltaHiddenOutput[weightIdx];
}
}
//Get the gradient error for each hidden node and compute how much each weight has to change
for(int hiddenIdx=0; hiddenIdx<=pNetwork->numHidden; hiddenIdx++){
errorGradientHidden[hiddenIdx] = getErrorGradientHidden(hiddenIdx);
for(int inputIdx=0; inputIdx <= pNetwork->numInputs; inputIdx++){
int weightIdx = pNetwork->getInputHiddenWeightIndex(inputIdx, hiddenIdx);
deltaInputHidden[weightIdx] = learningRate * pNetwork-> inputNeurons[inputIdx]*errorGradientHidden[hiddenIdx];// + 0.9*deltaInputHidden[weightIdx];
}
}
}
void NetworkTrainer::updateWeights(){
for(int inputIdx=0; inputIdx <=pNetwork->numInputs; inputIdx++){
for(int hiddenIdx=0; hiddenIdx<=pNetwork->numHidden; hiddenIdx++){
int weightIdx = pNetwork->getInputHiddenWeightIndex(inputIdx, hiddenIdx);
pNetwork->weightsInputHidden[weightIdx] -= deltaInputHidden[weightIdx];
}
}
for(int hiddenIdx=0; hiddenIdx <=pNetwork->numHidden; hiddenIdx++){
for(int outputIdx=0; outputIdx<pNetwork->numOutputs; outputIdx++){
int weightIdx = pNetwork->getHiddenOutputWeightIndex(hiddenIdx, outputIdx);
pNetwork->weightsHiddenOutput[weightIdx] -= deltaHiddenOutput[weightIdx];
}
}
}
void NetworkTrainer::runEpoch(vector<data> const& trainSet){
for(auto const& d: trainSet){
pNetwork->evaluate(d.X);
backpropagate(d.Y);
//We use stochastic gradient descent, and thus at each datapoint we update the weights
updateWeights();
double trainSetAccuracy = getAccuracy(trainSet);
cout << trainSetAccuracy << endl;
}
}
double NetworkTrainer::getAccuracy(const vector<data> &testSet){
double accuracy = 0;
int numIncorrectResults=0;
double totDatapoints = testSet.size();
for(auto const& d: testSet){
vector<double>::iterator i;
i = max_element(pNetwork->outputNeurons.begin(), pNetwork->outputNeurons.end());
double prediction = distance(pNetwork->outputNeurons.begin(), i);
bool found = 0;
double target = -1;
for(int i=0; i < pNetwork->numOutputs && !found; i++){
if(d.Y[i] ==1){
target=i;
found=1;
}
}
assert(target>-1);
if(target!=prediction)
numIncorrectResults++;
}
accuracy = 1.0 - numIncorrectResults/totDatapoints;
return accuracy;
}
vector<data> NetworkTrainer::createDataset(vector<vector<double>> X, vector<vector<double>> Y){
vector<data> dataset;
dataset.resize(X.size());
for(int i=0; i<X.size(); i++){
dataset[i].X = X[i];
dataset[i].Y = Y[i];
}
return dataset;
}
Further, I implement the following function in the .hpp file:
//Member functions
inline double getErrorGradientOutput(double desiredOutput, double output){
return desiredOutput-output;
};
Please help me! I really cannot spot the bug!
Aucun commentaire:
Enregistrer un commentaire