bug fixes with neural net implementation

author Brendan Hansen <brendan.f.hansen@gmail.com>

Fri, 22 Jan 2021 15:31:44 +0000 (09:31 -0600)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Fri, 22 Jan 2021 15:31:44 +0000 (09:31 -0600)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
diff --git a/src/mnist.onyx b/src/mnist.onyx

index 536a9f8a62ad82cd72d2a944b48afe38605532bc..54fb416e718cfc6193e1723a2da3b8273359b71a 100644 (file)
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -56,10 +56,12 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
              for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255;
  
              neural_net_forward(nn, ~~ input);
-            // output := neural_net_get_output(^nn);
-            // print_array(output);
-
              if ex % 100 == 0 {
+                print_array(expected);
+
+                output := neural_net_get_output(nn);
+                print_array(output);
+
                  loss := neural_net_loss(nn, ~~ expected);
                  printf("MSE loss: %f\n", cast(f32) loss);
              }
@@ -80,7 +82,7 @@ main :: (args: [] cstr) {
      mnist_data := mnist_data_make();
      defer mnist_data_close(^mnist_data);
  
-    nn := make_neural_net(28 * 28, 1000, 10);
+    nn := make_neural_net(28 * 28, 512, 256, 100, 10);
      defer neural_net_free(^nn);
  
      stocastic_gradient_descent(^nn, ^mnist_data);
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx

index 003115188c1dae3f236ad975f5cea1fa5b4899d8..4b5361443f8384b89b6df028f2c2b0b04b26177c 100644 (file)
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -60,12 +60,12 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
          // neurons, plus a bias.
          for j: layers[i].neurons.count {
              sigmoid_value   := layers[i].neurons[j];
-            d_sigmoid_value := sigmoid_value * (1 - sigmoid_value);
+            d_sigmoid_value := layers[i].activation.backward(sigmoid_value, layers[i].pre_activation_neurons[j]);
  
              // The last layer has its deriviate computed special, since it needs to capture
              // the derivative of the MSE function.
              if i == layers.count - 1 {
-                layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value;
+                layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value / ~~expected_output.count;
  
              } else {
                  d_neuron: float = 0;
@@ -81,10 +81,12 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
      // derivatives and update the biases and weights.
      for i: 1 .. layers.count {
          for j: layers[i].neurons.count {
-            layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j];
+            if layers[i].use_bias {
+                layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j];
+            }
  
              for k: layers[i].weights[j].count {
-                layers[i].weights[j][k] += LEARNING_RATE * layers[i].deltas[j] * layers[i].neurons[k];
+                layers[i].weights[j][k] += LEARNING_RATE * layers[i].deltas[j] * layers[i - 1].neurons[k];
              }
          }
      }
@@ -113,16 +115,30 @@ neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
  
  
  Layer :: struct {
-    neurons :   [] float;
+    neurons                :   [] float;
+    pre_activation_neurons :   [] float;
+
      biases  :   [] float;
      weights : [][] float; // CLEANUP: Make this a rank 1 slice
+
      deltas  :   [] float;
+
+    use_bias   : bool;
+    activation : ActivationFunction;
  }
  
  init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
      neurons = memory.make_slice(float, layer_size, allocator);
-    deltas  = memory.make_slice(float, layer_size, allocator);
-    biases  = memory.make_slice(float, layer_size, allocator);
+    pre_activation_neurons = memory.make_slice(float, layer_size, allocator);
+
+    use_bias = true;
+    if use_bias {
+        biases = memory.make_slice(float, layer_size, allocator);
+    }
+
+    deltas = memory.make_slice(float, layer_size, allocator);
+
+    activation = tanh_activation;
  
      if prev_layer_size > 0 {
          weights = memory.make_slice(#type [] float, layer_size, allocator);
@@ -138,29 +154,66 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat
  randomize_weights_and_biases :: (use layer: ^Layer) {
      for ^weight: weights {
          for ^w: *weight {
-            *w = cast(float) random.float(-0.5f, -0.5f);
+            *w = cast(float) random.float(-1.0f, 1.0f);
          }
      }
  
-    for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+    if use_bias {
+        for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+    }
  }
  
  layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
      for i: neurons.count {
-        neurons[i] = biases[i];
+        neurons[i] = 0;
+        if use_bias do neurons[i] = biases[i];
+
          for j: weights[i].count {
              neurons[i] += prev_layer.neurons[j] * weights[i][j];
          }
  
-        neurons[i] = sigmoid(neurons[i]);
+        pre_activation_neurons[i] = neurons[i];
+        neurons[i] = activation.forward(neurons[i]);
      }
  }
  
  
  
  
+
+
+
+ActivationFunction :: struct {
+    forward  : proc (x : float)           -> float;
+    backward : proc (fx: float, x: float) -> float;
+}
+
+
+sigmoid_activation := ActivationFunction.{ sigmoid, sigmoid_prime }
+
  sigmoid :: (x: float) -> float {
      ex := math.exp(x);
      return ex / (1 + ex);
  }
  
+sigmoid_prime :: (sx: float, _: float) -> float {
+    // This is defined in terms of the sigmoid of x
+    // sigma'(x) = sigma(x) * (1 - sigma(x))
+    return sx * (1 - sx);
+}
+
+
+tanh_activation := ActivationFunction.{ tanh, tanh_prime };
+
+tanh :: (x: float) -> float {
+    ex  := math.exp(x);
+    emx := math.exp(-x);
+    return (ex - emx) / (ex + emx);
+}
+
+tanh_prime :: (_: float, x: float) -> float {
+    ex  := math.exp(x);
+    emx := math.exp(-x);
+    s   := emx + ex;
+    return 4 / (s * s);
+}
+\ No newline at end of file
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Fri, 22 Jan 2021 15:31:44 +0000 (09:31 -0600)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
src/mnist.onyx		patch \| blob \| history
src/neuralnet.onyx		patch \| blob \| history