From: Brendan Hansen Date: Fri, 22 Jan 2021 04:30:17 +0000 (-0600) Subject: training everything works! X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=8e6c3f9bc1c96105fc52d8468036aa78b3c67ef5;p=onyx-mnist.git training everything works! --- diff --git a/src/mnist.onyx b/src/mnist.onyx index ee42222..536a9f8 100644 --- a/src/mnist.onyx +++ b/src/mnist.onyx @@ -5,16 +5,69 @@ use package core +MNIST_Data :: struct { + images : io.FileStream; + labels : io.FileStream; +} + +mnist_data_make :: (image_path := "data/train-images-idx3-ubyte", label_path := "data/train-labels-idx1-ubyte") -> MNIST_Data { + mnist_data: MNIST_Data; + err : io.Error; + err, mnist_data.images = io.open(image_path); + assert(err == io.Error.None, "There was an error loading the image file"); -// Load the data -// Feed forward neural net + err, mnist_data.labels = io.open(label_path); + assert(err == io.Error.None, "There was an error loading the label file"); + + return mnist_data; +} -load_example :: (fs: ^io.FileStream, example: u32, out: [784] u8) { +mnist_data_close :: (use mnist_data: ^MNIST_Data) { + io.stream_close(^images); + io.stream_close(^labels); +} + +load_example :: (use mnist_data: ^MNIST_Data, example: u32, out: [784] u8) -> u32 { location := 16 + example * 784; - _, bytes_read := io.stream_read_at(fs, location, ~~ out); - + _, bytes_read := io.stream_read_at(^images, location, ~~ out); + assert(bytes_read == 784, "Incorrect number of bytes read."); + + location = 8 + example; + label_buf : [1] u8; + _, bytes_read = io.stream_read_at(^labels, location, ~~ label_buf); + return ~~ label_buf[0]; +} + +stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training_examples := 50000) { + example : [784] u8; + expected := float.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; + input := memory.make_slice(float, 784); + defer cfree(input.data); + + for i: 10 { + for ex: training_examples { + label := load_example(mnist_data, ex, example); + expected[label] = 1.0f; + defer expected[label] = 0.0f; + + // CLEANUP: The double cast that is necessary here is gross. + for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255; + + neural_net_forward(nn, ~~ input); + // output := neural_net_get_output(^nn); + // print_array(output); + + if ex % 100 == 0 { + loss := neural_net_loss(nn, ~~ expected); + printf("MSE loss: %f\n", cast(f32) loss); + } + + neural_net_backward(nn, ~~ expected); + } + } + } main :: (args: [] cstr) { @@ -22,37 +75,13 @@ main :: (args: [] cstr) { // main_allocator := context.allocator; // context.allocator = alloc.log.logging_allocator(^main_allocator); - random.set_seed(1234); - - err, training_example := io.open("data/train-images-idx3-ubyte"); - if err != io.Error.None { - println("There was an error loading the file."); - return; - } - defer io.stream_close(^training_example); + random.set_seed(5234); - example : [784] u8; - load_example(^training_example, 0, example); + mnist_data := mnist_data_make(); + defer mnist_data_close(^mnist_data); nn := make_neural_net(28 * 28, 1000, 10); defer neural_net_free(^nn); - input := memory.make_slice(f32, 784); - defer cfree(input.data); - - // CLEANUP: The double cast that is necessary here is gross. - for i: input.count do input[i] = (cast(f32) cast(u32) example[i]) / 255; - - for i: 500 { - neural_net_forward(^nn, ~~ input); - output := neural_net_get_output(^nn); - for o: output do printf("%f ", o); - print("\n"); - - expected := f32.[ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ]; - loss := neural_net_loss(^nn, ~~ expected); - printf("MSE loss: %f\n", loss); - - neural_net_backward(^nn, ~~ expected); - } + stocastic_gradient_descent(^nn, ^mnist_data); } \ No newline at end of file diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx index c49bcb6..0031151 100644 --- a/src/neuralnet.onyx +++ b/src/neuralnet.onyx @@ -47,15 +47,26 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) { LEARNING_RATE :: cast(float) 0.01; + // Iterating backwards through the layers (hence the name "back propagation") + // The reason this is necessary is because we need to know the derivatives of + // neurons in the next layer to compute the derivatives of the current layers + // neurons. This is what makes this algorithm not exponentially slow. while i := layers.count - 1; i >= 1 { defer i -= 1; + // For every neuron, we need to calculate its corresponding "delta", which is + // kind of an abiguous term here. It specifically means the partial derivative + // of the the loss with respect to the weighted sum of the previous layers + // neurons, plus a bias. for j: layers[i].neurons.count { sigmoid_value := layers[i].neurons[j]; d_sigmoid_value := sigmoid_value * (1 - sigmoid_value); + // The last layer has its deriviate computed special, since it needs to capture + // the derivative of the MSE function. if i == layers.count - 1 { layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value; + } else { d_neuron: float = 0; for k: layers[i + 1].neurons.count { @@ -66,6 +77,8 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) { } } + // Once all the deltas are computed, we can use them to compute the actual + // derivatives and update the biases and weights. for i: 1 .. layers.count { for j: layers[i].neurons.count { layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j]; @@ -125,11 +138,11 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat randomize_weights_and_biases :: (use layer: ^Layer) { for ^weight: weights { for ^w: *weight { - *w = random.float(-0.5f, -0.5f); + *w = cast(float) random.float(-0.5f, -0.5f); } } - for ^bias: biases do *bias = random.float(-0.5f, 0.5f); + for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f); } layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {