From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Fri, 22 Jan 2021 15:31:44 +0000 (-0600)
Subject: bug fixes with neural net implementation
X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=98c8dd1fadaa45cd712d887b1685d82712a4d798;p=onyx-mnist.git

bug fixes with neural net implementation
---

diff --git a/src/mnist.onyx b/src/mnist.onyx
index 536a9f8..54fb416 100644
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -56,10 +56,12 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
             for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255;
 
             neural_net_forward(nn, ~~ input);
-            // output := neural_net_get_output(^nn);
-            // print_array(output);
-
             if ex % 100 == 0 {
+                print_array(expected);
+
+                output := neural_net_get_output(nn);
+                print_array(output);
+
                 loss := neural_net_loss(nn, ~~ expected);
                 printf("MSE loss: %f\n", cast(f32) loss);
             }
@@ -80,7 +82,7 @@ main :: (args: [] cstr) {
     mnist_data := mnist_data_make();
     defer mnist_data_close(^mnist_data);
 
-    nn := make_neural_net(28 * 28, 1000, 10);
+    nn := make_neural_net(28 * 28, 512, 256, 100, 10);
     defer neural_net_free(^nn);
 
     stocastic_gradient_descent(^nn, ^mnist_data);
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx
index 0031151..4b53614 100644
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -60,12 +60,12 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
         // neurons, plus a bias.
         for j: layers[i].neurons.count {
             sigmoid_value   := layers[i].neurons[j];
-            d_sigmoid_value := sigmoid_value * (1 - sigmoid_value);
+            d_sigmoid_value := layers[i].activation.backward(sigmoid_value, layers[i].pre_activation_neurons[j]);
 
             // The last layer has its deriviate computed special, since it needs to capture
             // the derivative of the MSE function.
             if i == layers.count - 1 {
-                layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value;
+                layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value / ~~expected_output.count;
 
             } else {
                 d_neuron: float = 0;
@@ -81,10 +81,12 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
     // derivatives and update the biases and weights.
     for i: 1 .. layers.count {
         for j: layers[i].neurons.count {
-            layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j];
+            if layers[i].use_bias {
+                layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j];
+            }
 
             for k: layers[i].weights[j].count {
-                layers[i].weights[j][k] += LEARNING_RATE * layers[i].deltas[j] * layers[i].neurons[k];
+                layers[i].weights[j][k] += LEARNING_RATE * layers[i].deltas[j] * layers[i - 1].neurons[k];
             }
         }
     }
@@ -113,16 +115,30 @@ neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
 
 
 Layer :: struct {
-    neurons :   [] float;
+    neurons                :   [] float;
+    pre_activation_neurons :   [] float;
+
     biases  :   [] float;
     weights : [][] float; // CLEANUP: Make this a rank 1 slice
+
     deltas  :   [] float;
+
+    use_bias   : bool;
+    activation : ActivationFunction;
 }
 
 init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
     neurons = memory.make_slice(float, layer_size, allocator);
-    deltas  = memory.make_slice(float, layer_size, allocator);
-    biases  = memory.make_slice(float, layer_size, allocator);
+    pre_activation_neurons = memory.make_slice(float, layer_size, allocator);
+
+    use_bias = true;
+    if use_bias {
+        biases = memory.make_slice(float, layer_size, allocator);
+    }
+
+    deltas = memory.make_slice(float, layer_size, allocator);
+
+    activation = tanh_activation;
 
     if prev_layer_size > 0 {
         weights = memory.make_slice(#type [] float, layer_size, allocator);
@@ -138,29 +154,66 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat
 randomize_weights_and_biases :: (use layer: ^Layer) {
     for ^weight: weights {
         for ^w: *weight {
-            *w = cast(float) random.float(-0.5f, -0.5f);
+            *w = cast(float) random.float(-1.0f, 1.0f);
         }
     }
 
-    for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+    if use_bias {
+        for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+    }
 }
 
 layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
     for i: neurons.count {
-        neurons[i] = biases[i];
+        neurons[i] = 0;
+        if use_bias do neurons[i] = biases[i];
+
         for j: weights[i].count {
             neurons[i] += prev_layer.neurons[j] * weights[i][j];
         }
 
-        neurons[i] = sigmoid(neurons[i]);
+        pre_activation_neurons[i] = neurons[i];
+        neurons[i] = activation.forward(neurons[i]);
     }
 }
 
 
 
 
+
+
+
+ActivationFunction :: struct {
+    forward  : proc (x : float)           -> float;
+    backward : proc (fx: float, x: float) -> float;
+}
+
+
+sigmoid_activation := ActivationFunction.{ sigmoid, sigmoid_prime }
+
 sigmoid :: (x: float) -> float {
     ex := math.exp(x);
     return ex / (1 + ex);
 }
 
+sigmoid_prime :: (sx: float, _: float) -> float {
+    // This is defined in terms of the sigmoid of x
+    // sigma'(x) = sigma(x) * (1 - sigma(x))
+    return sx * (1 - sx);
+}
+
+
+tanh_activation := ActivationFunction.{ tanh, tanh_prime };
+
+tanh :: (x: float) -> float {
+    ex  := math.exp(x);
+    emx := math.exp(-x);
+    return (ex - emx) / (ex + emx);
+}
+
+tanh_prime :: (_: float, x: float) -> float {
+    ex  := math.exp(x);
+    emx := math.exp(-x);
+    s   := emx + ex;
+    return 4 / (s * s);
+}
\ No newline at end of file