I'm working on a Neural Network for classifying handwriten number (MNIST). Im using Mingw-w64 (c++ 11) on a windows 10 device. Im using Sublime Text 3 with the default build system provided by Sublime Text. My program builds normally, but after a few iterations it just stops compiling. matrixLibrary.h (second and third bunch of code) consitsts of a bunch of functions used in Neural Networking, while first bunch of code is the acctual network. Code is inspirred by (https://www.kaggle.com/wwsalmon/simple-mnist-nn-from-scratch-numpy-no-tf-keras/notebook), the dataset is also on the same site. I can't figure out why. Please help.
#include "matrixLibrary.h"
#define swap16(n) (((n&0xFF00)>>8)|((n&0x00FF)<<8))
#define swap32(n) ((swap16((n&0xFFFF0000)>>16))|((swap16(n&0x0000FFFF))<<16))
typedef unsigned char byte;
struct MNISTchar {
std::vector<float> pixelData;
std::vector<float> output;
int label;
MNISTchar() : pixelData(std::vector<float>()), output(std::vector<float>(10)), label(0) {}
};
class MNIST{
public:
//MNIST loader by Peter Baumann (https://github.com/krck/MNIST-Loader)
const std::vector<MNISTchar> trainingData;
const std::vector<MNISTchar> testData;
MNIST(const std::string& path, int maxNumb)
: trainingData(getMNISTdata(path + "train-images-idx3-ubyte", path + "train-labels-idx1-ubyte")),
testData(getMNISTdata(path + "t10k-images-idx3-ubyte", path + "t10k-labels-idx1-ubyte")) {
if(!this->trainingData.size()) { std::cout <<"ERROR: parsing training data" <<std::endl; }
if(!this->testData.size()) { std::cout <<"ERROR: parsing testing data" <<std::endl; }
}
private:
std::vector<MNISTchar> getMNISTdata(const std::string& imagepath, const std::string& labelpath) {
std::vector<MNISTchar> tmpdata = std::vector<MNISTchar>();
std::fstream file (imagepath, std::ifstream::in | std::ifstream::binary);
int magicNum_images = 0, magicNum_labels = 0;
int itemCount_images = 0, itemCount_labels = 0;
if(file.is_open()) {
int row_count = 0, col_count = 0;
file.read((char*)&magicNum_images, 4);
file.read((char*)&itemCount_images, 4);
file.read((char*)&row_count, 4);
file.read((char*)&col_count, 4);
magicNum_images = swap32(magicNum_images);
itemCount_images = swap32(itemCount_images);
row_count = swap32(row_count);
col_count= swap32(col_count);
for (int i = 0; i < itemCount_images; i++) {
MNISTchar tmpchar = MNISTchar();
for(int r = 0; r < (row_count * col_count); r++) {
byte pixel = 0;
file.read((char*)&pixel, 1);
tmpchar.pixelData.push_back((float)pixel / 255);
}
tmpdata.push_back(tmpchar);
}
}
file.close();
file.open(labelpath, std::ifstream::in | std::ifstream::binary);
if (file.is_open()) {
file.read((char*)&magicNum_labels, 4);
file.read((char*)&itemCount_labels, 4);
magicNum_labels = swap32(magicNum_labels);
itemCount_labels = swap32(itemCount_labels);
if(itemCount_images == itemCount_labels) {
for(MNISTchar& m : tmpdata) {
file.read((char*)&m.label, 1);
m.output[m.label] = 1.0f;
}
}
}
file.close();
return tmpdata;
}
public:
void run(float minNumb, float maxNumb, float learningrate, short itereations){
Helpers helpers = Helpers();
short inputSize = 784;
short dimension1 = 10;
short outputSize = 10;
matrixF* weights1 = helpers.initializeWeight(dimension1, inputSize, minNumb, maxNumb);
matrixF* weights2 = helpers.initializeWeight(outputSize, dimension1, minNumb, maxNumb);
vecF* bias1 = helpers.initializeBias(dimension1, minNumb, maxNumb);
vecF* bias2 = helpers.initializeBias(outputSize, minNumb, maxNumb);
matrixF* input = new matrixF(batchSize, vecF(inputSize));
input->clear();
matrixF* correctOutput = new matrixF(batchSize, vecF(outputSize));
correctOutput->clear();
for (int i = 0; i < batchSize; i++){
input->push_back(trainingData[i].pixelData);
correctOutput->push_back(trainingData[i].output);
}
input = helpers.transform(input);
correctOutput = helpers.transform(correctOutput);
matrixF* wbOut1 = 0;
matrixF* wbOut2 = 0;
matrixF* actFuncOut1 = 0;
matrixF* actFuncOut2 = 0;
float append = learningrate/batchSize;
matrixF* dOut1 = new matrixF(batchSize, vecF(dimension1));
for (short i = 0; i < itereations; i++){
wbOut1 = helpers.forwardProp2(weights1, helpers.transform(input), bias1, batchSize, dimension1);
actFuncOut1 = helpers.reLUMatrix(wbOut1);
wbOut2 = helpers.forwardProp2(weights2, helpers.transform(actFuncOut1), bias2, batchSize, outputSize);
actFuncOut2 = helpers.reLUMatrix(wbOut2);
//backprop start
matrixF* dOut2 = 0;
dOut1->clear();
dOut2 = helpers.subtractMatrices(actFuncOut2, correctOutput);
helpers.outDeriv(dOut1, helpers.transform(weights2), helpers.transform(dOut2), wbOut1, dimension1);
weights1 = helpers.subtractMatrices(weights1, helpers.fullDot(dOut1, input, dimension1, inputSize, append));
bias1 = helpers.subtractVectors(bias1, helpers.sumMatrix(dOut1, append), dimension1);
weights2 = helpers.subtractMatrices(weights2, helpers.fullDot(dOut2, actFuncOut1, dimension1, outputSize, append));
bias2 = helpers.subtractVectors(bias2, helpers.sumMatrix(dOut2, append), outputSize);
//backprop done
std::cout<<i+1<<std::endl;
helpers.getAccuracy(helpers.transform(actFuncOut2), helpers.transform(correctOutput));
helpers.checkMemory(0);
}
}
};
typedef std::chrono::steady_clock clk;
int main(){
const clk::time_point startTime = clk::now();
MNIST mnist = MNIST("Dataset/", 1);
const clk::time_point stopLoad = clk::now();
float time = std::chrono::duration_cast<std::chrono::milliseconds>(stopLoad - startTime).count();
std::cout<<"Load time: "<<time/1000<<" sec." <<std::endl;
std::srand(std::time(nullptr));
mnist.run(-0.5, 1, 0.01, 500);
const clk::time_point stopTotal = clk::now();
time = std::chrono::duration_cast<std::chrono::milliseconds>(stopTotal - startTime).count();
std::cout<<"Total compile time: "<<time/1000<<" sec." <<std::endl;
return 0;
}
#include"matrixLibrary.h"
Helpers::Helpers(){
}
Helpers::~Helpers(){
}
short Helpers::getSizeMat(matrixF* mat){
short i = 0;
for (vecF vec : *mat){
i++;
}
return i;
}
short Helpers::getSizeVec(vecF* vec){
short i = 0;
for (float f : *vec){
i++;
}
return i;
}
float Helpers::randNumb(float minNumb, float maxNumb){
return minNumb + static_cast <float> (rand()) /( static_cast <float> (RAND_MAX/(maxNumb-minNumb)));
}
float Helpers::reLU(float numb){
return (numb > 0) ? numb : 0;
}
float Helpers::reLUDeriv(float numb){
return numb > 0;
}
matrixF* Helpers::reLUMatrix(matrixF* input){
short i = getSizeMat(input);
short j = getSizeVec(&(*input)[0]);
matrixF* result = new matrixF(i, vecF(j));
vecF* oneVec = new vecF(j);
result->clear();
for (vecF vec : *input){
oneVec->clear();
for (float f : vec){
oneVec->push_back(reLU(f));
}
result->push_back(*oneVec);
}
return result;
}
float Helpers::sigmoid(float numb){
return 1/(1+exp(-numb));
}
float Helpers::sigmoidDeriv(float numb){
return exp(-numb)/pow((1+exp(-numb)), 2);
}
float Helpers::sigmoidInverse(float numb){
return log(1/numb-1)/-1;
}
matrixF* Helpers::sigmoidMatrix(matrixF* input){
short i = getSizeMat(input);
short j = getSizeVec(&(*input)[0]);
matrixF* result = new matrixF(i, vecF(j));
vecF* oneVec = new vecF(j);
result->clear();
for (vecF vec : *input){
oneVec->clear();
for (float f : vec){
oneVec->push_back(sigmoid(f));
}
result->push_back(*oneVec);
}
return result;
}
float Helpers::dotProduct(vecF* vector1, vecF* vector2) {
float product = 0.0;
short i = 0;
for (float elem : *vector1){
product += elem * (*vector2)[i];
i++;
}
return product;
}
matrixF* Helpers::fullDot(matrixF* input1, matrixF* input2, short size1, short size2, float append){
matrixF* result = new matrixF(size1, vecF(size2));
vecF* oneVec = new vecF(size2);
result->clear();
for (vecF vec1 : *input1){
oneVec->clear();
for (vecF vec2 : *input2){
oneVec->push_back(dotProduct(&vec1, &vec2) * append);
}
result->push_back(*oneVec);
}
return result;
}
void Helpers::outDeriv(matrixF* outMat, matrixF* input1, matrixF* input2, matrixF* input3, short size){
vecF* oneVec = new vecF(size);
short i = 0, j = 0;
for (vecF vec : *input1){
oneVec->clear();
j = 0;
for (vecF weight : *input2){
oneVec->push_back(dotProduct(&weight, &vec) * reLUDeriv((*input3)[i][j]));
j++;
}
outMat->push_back(*oneVec);
i++;
}
}
void Helpers::addVectors(vecF* vec1, vecF* vec2){
short i = 0;
for (float f : *vec1){
(*vec1)[i] = f + (*vec2)[i];
i++;
}
}
void Helpers::addBias(matrixF* input, vecF* bias){
short i = 0;
for (vecF vec : *input){
addVectors(&vec, bias);
(*input)[i].assign(vec.begin(), vec.end());
i++;
}
}
matrixF* Helpers::forwardProp1(matrixF* input, matrixF* weights, vecF* bias, short size){
matrixF* result = new matrixF();
vecF* oneImg = new vecF(size);
result->clear();
short int i = 0;
for (vecF image : *input){
oneImg->clear();
for (vecF weight : *weights){
oneImg->push_back(sigmoid(dotProduct(&weight, &image) + (*bias)[i]));
}
result->push_back(*oneImg);
i+=1;
}
return result;
}
matrixF* Helpers::forwardProp2(matrixF* input, matrixF* weights, vecF* bias, short size1, short size2){
matrixF* temp = 0;
temp = fullDot(input, weights, size1, size2, 1);
addBias(temp, bias);
return temp;
}
float Helpers::squareError(vecF* output, vecF* correctOutput, short size){
float gradient = 0.0;
for (short int i = 0; i < size; i++){
gradient += pow((*output)[i] - (*correctOutput)[i], 2);
}
return gradient;
}
vecF* Helpers::sumMatrix(matrixF* input, float append){
vecF* result = new vecF(getSizeVec(&(*input)[0]));
float tempf;
result->clear();
for (vecF vec : *input){
tempf = 0;
for (float f : vec){
tempf += f * append;
}
result->push_back(tempf);
}
return result;
}
matrixF* Helpers::subtractMatrices(matrixF* mat1, matrixF* mat2){
short size1 = getSizeMat(mat1);
short size2 = getSizeVec(&(*mat1)[0]);
matrixF* result = new matrixF(size1, vecF(size2));
vecF* oneVec = new vecF(size2);
result->clear();
for (int i = 0; i < size1; i++){
oneVec->clear();
for (int j = 0; j < size2; j++){
oneVec->push_back((*mat1)[i][j] - (*mat2)[i][j]);
}
result->push_back(*oneVec);
}
return result;
}
vecF* Helpers::subtractVectors(vecF* vec1, vecF* vec2, short size){
vecF* result = new vecF(size);
result->clear();
for (int i = 0; i < size; i++){
result->push_back((*vec1)[i] - (*vec2)[i]);
}
return result;
}
matrixF* Helpers::transform(matrixF* input){
short size1 = getSizeMat(input);
short size2 = getSizeVec(&(*input)[0]);
matrixF* result = new matrixF(size2, vecF(size1));
for (short i = 0; i < size1; i++){
for (short j = 0; j < size2; j++){
(*result)[j][i] = (*input)[i][j];
}
}
return result;
}
matrixF* Helpers::initializeWeight(short size1, short size2, float minNumb, float maxNumb){
matrixF* weights = new matrixF(size1, vecF(size2));
for (int i = 0; i < size1; i++){
for (int j = 0; j < size2; j++){
(*weights)[i][j] = randNumb(minNumb, maxNumb);
}
}
return weights;
}
vecF* Helpers::initializeBias(short size, float minNumb, float maxNumb){
vecF* bias = new vecF(size);
for (int i = 0; i < size; i++){
(*bias)[i] = randNumb(minNumb, maxNumb);
}
return bias;
}
void Helpers::checkSizeMatrix(matrixF* input){
short t = 0, q = 0;
for (vecF vec : *input){
q = 0;
for (float x : vec){
q += 1;
}
t+=1;
}
std::cout<<"Amount vectors: "<<t<<", amount elements each vector: "<<q<<". Quick: ("<<t<<", "<<q<<")"<<std::endl;
}
void Helpers::checkSizeVector(vecF* input){
short t = 0;
for (float f : *input){
t+=1;
}
std::cout<<"Elements: "<<t<<std::endl;
}
void Helpers::getAccuracy(matrixF* output3, matrixF* correctOutput){
float total = 0.0;
for (int i = 0; i < batchSize; i++){
int max1 = std::max_element((*output3)[i].begin(), (*output3)[i].end()) - (*output3)[i].begin();
int max2 = std::max_element((*correctOutput)[i].begin(), (*correctOutput)[i].end()) - (*correctOutput)[i].begin();
if (max1 == max2){
total += 1;
}
}
std::cout<<"Accuracy is "<<total/batchSize*100<<"%"<<std::endl;
}
void Helpers::printMatrix(matrixF* input){
for (vecF vec : *input){
for (float f : vec){
std::cout<<f<<", ";
}
std::cout<<std::endl;
}
}
void Helpers::printVector(vecF* input){
for (float f : *input){
std::cout<<f<<", ";
}
std::cout<<std::endl;
}
void Helpers::checkMemory(bool fullPrint){
//code by Anderas Masur https://forums.codeguru.com/showthread.php?280081-Get-available-memory
MEMORYSTATUS MemStat;
memset(&MemStat, 0, sizeof(MemStat));
::GlobalMemoryStatus(&MemStat);
if (fullPrint){
std::cout << "Length of structure: " << MemStat.dwLength
<< std::endl
<< "Memory usage: " << MemStat.dwMemoryLoad
<< " %" << std::endl
<< "Physical memory: " << MemStat.dwTotalPhys / 1024
<< " KB" << std::endl
<< "Free physical memory: " << MemStat.dwAvailPhys / 1024
<< " KB" << std::endl
<< "Paging file: " << MemStat.dwTotalPageFile / 1024
<< " KB" << std::endl
<< "Free paging file: " << MemStat.dwAvailPageFile / 1024
<< " KB" << std::endl
<< "Virtual memory: " << MemStat.dwTotalVirtual / 1024
<< " KB" << std::endl
<< "Free virtual memory: " << MemStat.dwAvailVirtual / 1024
<< " KB" << std::endl;
} else{
std::cout<<"Memory usage: "<< MemStat.dwMemoryLoad<<" %"<<std::endl;
}
}
void Helpers::checkForNanMatrix(matrixF* input){
short total = 0;
for (vecF vec : *input){
for (float f : vec){
//Nan compared to something is always false
if (f != f){
total+=1;
}
}
}
std::cout<<"There is "<<total<<" Nan in matrix"<<std::endl;
}
void Helpers::checkForNanVec(vecF* input){
short total = 0;
for (float f : *input){
//Nan compared to something is always false
if (f != f){
total+=1;
}
}
std::cout<<"There is "<<total<<" Nan in vector"<<std::endl;
}
#ifndef matrixLibrary_h
#define matrixLibrary_h
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <chrono>
#include <random>
#include <math.h>
#include <numeric>
#include <algorithm>
#include <stdio.h>
#include <windows.h>
#include <unistd.h>
#include <memory.h>
#include <conio.h>
#define batchSize 1000
#define imgSize 748
typedef std::vector<float> vecF;
typedef std::vector<vecF> matrixF;
class Helpers{
public:
Helpers();
~Helpers();
float randNumb(float minNumb, float maxNumb);
float reLU(float numb);
float reLUDeriv(float numb);
matrixF* reLUMatrix(matrixF* input);
float sigmoid(float numb);
float sigmoidDeriv(float numb);
float sigmoidInverse(float numb);
matrixF* sigmoidMatrix(matrixF* input);
float dotProduct(vecF* vector1, vecF* vector2);
float squareError(vecF* output, vecF* correctOutput, short size);
matrixF* forwardProp1(matrixF* input, matrixF* weights, vecF* bias, short size);
matrixF* forwardProp2(matrixF* input, matrixF* weights, vecF* bias, short size1, short size2);
matrixF* fullDot(matrixF* input1, matrixF* input2, short size1, short size2, float append);
matrixF* subtractMatrices(matrixF* mat1, matrixF* mat2);
matrixF* transform(matrixF* input);
matrixF* initializeWeight(short size1, short size2, float minNumb, float maxNumb);
vecF* sumMatrix(matrixF* input, float append);
vecF* subtractVectors(vecF* vec1, vecF* vec2, short size);
vecF* initializeBias(short size, float minNumb, float maxNumb);
short getSizeMat(matrixF* mat);
short getSizeVec(vecF* vec);
void outDeriv(matrixF* outMat, matrixF* input1, matrixF* input2, matrixF* input3, short size);
void addVectors(vecF* vec1, vecF* vec2);
void addBias(matrixF* input, vecF* bias);
void checkSizeMatrix(matrixF* input);
void checkSizeVector(vecF* input);
void getAccuracy(matrixF* output3, matrixF* correctOutput);
void printMatrix(matrixF* input);
void printVector(vecF* input);
void checkMemory(bool fullPrint);
void checkForNanMatrix(matrixF* input);
void checkForNanVec(vecF* input);
};
#include"matrixLibrary.cpp"
#endif
Aucun commentaire:
Enregistrer un commentaire