From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Fri, 22 Jan 2021 04:30:17 +0000 (-0600)
Subject: training everything works!
X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=8e6c3f9bc1c96105fc52d8468036aa78b3c67ef5;p=onyx-mnist.git

training everything works!
---

diff --git a/src/mnist.onyx b/src/mnist.onyx
index ee42222..536a9f8 100644
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -5,16 +5,69 @@
 
 use package core
 
+MNIST_Data :: struct {
+    images : io.FileStream;
+    labels : io.FileStream;
+}
+
+mnist_data_make :: (image_path := "data/train-images-idx3-ubyte", label_path := "data/train-labels-idx1-ubyte") -> MNIST_Data {
+    mnist_data: MNIST_Data;
 
+    err : io.Error;
+    err, mnist_data.images = io.open(image_path);
+    assert(err == io.Error.None, "There was an error loading the image file");
 
-// Load the data
-// Feed forward neural net
+    err, mnist_data.labels = io.open(label_path);
+    assert(err == io.Error.None, "There was an error loading the label file");
+
+    return mnist_data;
+}
 
-load_example :: (fs: ^io.FileStream, example: u32, out: [784] u8) {
+mnist_data_close :: (use mnist_data: ^MNIST_Data) {
+    io.stream_close(^images);
+    io.stream_close(^labels);
+}
+
+load_example :: (use mnist_data: ^MNIST_Data, example: u32, out: [784] u8) -> u32 {
     location := 16 + example * 784;
-    _, bytes_read := io.stream_read_at(fs, location, ~~ out);
-    
+    _, bytes_read := io.stream_read_at(^images, location, ~~ out);
+
     assert(bytes_read == 784, "Incorrect number of bytes read.");
+
+    location = 8 + example;
+    label_buf : [1] u8;
+    _, bytes_read = io.stream_read_at(^labels, location, ~~ label_buf);
+    return ~~ label_buf[0];
+}
+
+stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training_examples := 50000) {
+    example : [784] u8;
+    expected := float.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
+    input := memory.make_slice(float, 784);
+    defer cfree(input.data);
+
+    for i: 10 {
+        for ex: training_examples {
+            label := load_example(mnist_data, ex, example);
+            expected[label] = 1.0f;
+            defer expected[label] = 0.0f;
+
+            // CLEANUP: The double cast that is necessary here is gross.
+            for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255;
+
+            neural_net_forward(nn, ~~ input);
+            // output := neural_net_get_output(^nn);
+            // print_array(output);
+
+            if ex % 100 == 0 {
+                loss := neural_net_loss(nn, ~~ expected);
+                printf("MSE loss: %f\n", cast(f32) loss);
+            }
+
+            neural_net_backward(nn, ~~ expected);
+        }
+    }
+
 }
 
 main :: (args: [] cstr) {
@@ -22,37 +75,13 @@ main :: (args: [] cstr) {
     // main_allocator := context.allocator;
     // context.allocator = alloc.log.logging_allocator(^main_allocator);
 
-    random.set_seed(1234);
-
-    err, training_example := io.open("data/train-images-idx3-ubyte");
-    if err != io.Error.None {
-        println("There was an error loading the file.");
-        return;
-    }
-    defer io.stream_close(^training_example);
+    random.set_seed(5234);
 
-    example : [784] u8;
-    load_example(^training_example, 0, example);
+    mnist_data := mnist_data_make();
+    defer mnist_data_close(^mnist_data);
 
     nn := make_neural_net(28 * 28, 1000, 10);
     defer neural_net_free(^nn);
 
-    input := memory.make_slice(f32, 784);
-    defer cfree(input.data);
-
-    // CLEANUP: The double cast that is necessary here is gross.
-    for i: input.count do input[i] = (cast(f32) cast(u32) example[i]) / 255;
-
-    for i: 500 {
-        neural_net_forward(^nn, ~~ input);
-        output := neural_net_get_output(^nn);
-        for o: output do printf("%f ", o);
-        print("\n");
-
-        expected := f32.[ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ];
-        loss := neural_net_loss(^nn, ~~ expected);
-        printf("MSE loss: %f\n", loss);
-
-        neural_net_backward(^nn, ~~ expected);
-    }
+    stocastic_gradient_descent(^nn, ^mnist_data);
 }
\ No newline at end of file
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx
index c49bcb6..0031151 100644
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -47,15 +47,26 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
 
     LEARNING_RATE :: cast(float) 0.01;
 
+    // Iterating backwards through the layers (hence the name "back propagation")
+    // The reason this is necessary is because we need to know the derivatives of
+    // neurons in the next layer to compute the derivatives of the current layers
+    // neurons. This is what makes this algorithm not exponentially slow.
     while i := layers.count - 1; i >= 1 {
         defer i -= 1;
 
+        // For every neuron, we need to calculate its corresponding "delta", which is
+        // kind of an abiguous term here. It specifically means the partial derivative
+        // of the the loss with respect to the weighted sum of the previous layers
+        // neurons, plus a bias.
         for j: layers[i].neurons.count {
             sigmoid_value   := layers[i].neurons[j];
             d_sigmoid_value := sigmoid_value * (1 - sigmoid_value);
 
+            // The last layer has its deriviate computed special, since it needs to capture
+            // the derivative of the MSE function.
             if i == layers.count - 1 {
                 layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value;
+
             } else {
                 d_neuron: float = 0;
                 for k: layers[i + 1].neurons.count {
@@ -66,6 +77,8 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
         }
     }
 
+    // Once all the deltas are computed, we can use them to compute the actual
+    // derivatives and update the biases and weights.
     for i: 1 .. layers.count {
         for j: layers[i].neurons.count {
             layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j];
@@ -125,11 +138,11 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat
 randomize_weights_and_biases :: (use layer: ^Layer) {
     for ^weight: weights {
         for ^w: *weight {
-            *w = random.float(-0.5f, -0.5f);
+            *w = cast(float) random.float(-0.5f, -0.5f);
         }
     }
 
-    for ^bias: biases do *bias = random.float(-0.5f, 0.5f);
+    for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
 }
 
 layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {