added basics of serializing networks

author Brendan Hansen <brendan.f.hansen@gmail.com>

Fri, 22 Jan 2021 22:05:54 +0000 (16:05 -0600)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Fri, 22 Jan 2021 22:05:54 +0000 (16:05 -0600)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
diff --git a/src/mnist.onyx b/src/mnist.onyx

index 1099db155fd4d6750aaac581ac47315c599fe66b..efad3aacb7cba94fc3987d55a09af8503affc2d3 100644 (file)
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -42,8 +42,8 @@ load_example :: (use mnist_data: ^MNIST_Data, example: u32, out: [784] u8) -> u3
  
  stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training_examples := 50000) {
      example : [784] u8;
-    expected := float.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
-    input := memory.make_slice(float, 784);
+    expected := f32.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
+    input := memory.make_slice(f32, 784);
      defer cfree(input.data);
  
      for i: 10 {
@@ -53,7 +53,7 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
              defer expected[label] = 0.0f;
  
              // CLEANUP: The double cast that is necessary here is gross.
-            for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255;
+            for i: input.count do input[i] = (cast(f32) cast(u32) example[i]) / 255;
  
              neural_net_forward(nn, ~~ input);
              neural_net_backward(nn, ~~ expected);
@@ -65,9 +65,9 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
  
                      for i: arr.count {
                          if i == greatest_idx {
-                            printf("\x1b[94m%f\x1b[0m ", arr[i]);
+                            printf("\x1b[94m%f\x1b[0m ", cast(f32) arr[i]);
                          } else {
-                            printf("%f ", arr[i]);
+                            printf("%f ", cast(f32) arr[i]);
                          }
                      }
                      print("\n");
@@ -80,6 +80,11 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
  
                  loss := neural_net_loss(nn, ~~ expected);
                  printf("MSE loss: %f\n", cast(f32) loss);
+
+                if ex % 10000 == 0 {
+                    println("Saving neural network...");
+                    neural_net_save(nn, "data/test_1.nn");
+                }
              }
          }
      }
@@ -91,6 +96,8 @@ main :: (args: [] cstr) {
      // main_allocator := context.allocator;
      // context.allocator = alloc.log.logging_allocator(^main_allocator);
  
+    _ := neural_net_load("data/dummy.nn");
+
      random.set_seed(5234);
  
      mnist_data := mnist_data_make();
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx

index 48c1b3aabad525d543376f9cd25de7a4a521a2e7..884d6a785c00115ecaba353749dae2eff4ac5d74 100644 (file)
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -1,9 +1,5 @@
  use package core
  
-// To easily change to 64-bit floats if needed.
-float :: #type f32;
-
-
  NeuralNet :: struct {
      layers  : [] Layer;
  
@@ -13,25 +9,30 @@ NeuralNet :: struct {
  
  make_neural_net :: (layer_sizes: ..i32) -> NeuralNet {
      net : NeuralNet;
+    neural_net_init(^net, layer_sizes.count);
  
-    net.layer_arena  = alloc.arena.make(context.allocator, 64 * 1024 * 1024); // 64 MiB
      layer_allocator := alloc.arena.make_allocator(^net.layer_arena);
  
-    net.layers = memory.make_slice(Layer, layer_sizes.count, allocator = layer_allocator);
-
-    init_layer(^net.layers[0], layer_sizes[0], 0, allocator = layer_allocator);
+    layer_init(^net.layers[0], layer_sizes[0], 0, allocator = layer_allocator);
      for i: 1 .. net.layers.count {
-        init_layer(^net.layers[i], layer_sizes[i], layer_sizes[i - 1], allocator = layer_allocator);
+        layer_init(^net.layers[i], layer_sizes[i], layer_sizes[i - 1], allocator = layer_allocator);
      }
  
      return net;
  }
  
+neural_net_init :: (use nn: ^NeuralNet, layer_count: u32) {
+    layer_arena      = alloc.arena.make(context.allocator, 64 * 1024 * 1024); // 64 MiB
+    layer_allocator := alloc.arena.make_allocator(^layer_arena);
+
+    layers = memory.make_slice(Layer, layer_count, allocator = layer_allocator);
+}
+
  neural_net_free :: (use nn: ^NeuralNet) {
      alloc.arena.free(^layer_arena);
  }
  
-neural_net_forward :: (use nn: ^NeuralNet, input: [] float) {
+neural_net_forward :: (use nn: ^NeuralNet, input: [] f32) {
      assert(input.count == layers[0].neurons.count, "Input does not have the same size as the first layer.");
  
      for i: input.count do layers[0].neurons[i] = input[i];
@@ -41,11 +42,11 @@ neural_net_forward :: (use nn: ^NeuralNet, input: [] float) {
      }
  }
  
-neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
+neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32) {
      assert(layers[layers.count - 1].neurons.count == expected_output.count,
              "Expected output does not have the same size as the last layer.");
  
-    LEARNING_RATE :: cast(float) 0.01;
+    LEARNING_RATE :: cast(f32) 0.01;
  
      // Iterating backwards through the layers (hence the name "back propagation")
      // The reason this is necessary is because we need to know the derivatives of
@@ -55,20 +56,20 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
          defer i -= 1;
  
          // For every neuron, we need to calculate its corresponding "delta", which is
-        // kind of an abiguous term here. It specifically means the partial derivative
+        // kind of an ambiguous term here. It specifically means the partial derivative
          // of the the loss with respect to the weighted sum of the previous layers
          // neurons, plus a bias.
          for j: layers[i].neurons.count {
              sigmoid_value   := layers[i].neurons[j];
              d_sigmoid_value := layers[i].activation.backward(sigmoid_value, layers[i].pre_activation_neurons[j]);
  
-            // The last layer has its deriviate computed special, since it needs to capture
+            // The last layer has its derivative computed special, since it needs to capture
              // the derivative of the MSE function.
              if i == layers.count - 1 {
                  layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value / ~~expected_output.count;
  
              } else {
-                d_neuron: float = 0;
+                d_neuron: f32 = 0;
                  for k: layers[i + 1].neurons.count {
                      d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j];
                  }
@@ -92,18 +93,18 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
      }
  }
  
-neural_net_get_output :: (use nn: ^NeuralNet) -> [] float {
+neural_net_get_output :: (use nn: ^NeuralNet) -> [] f32 {
      return layers[layers.count - 1].neurons;
  }
  
-neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
+neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] f32) -> f32 {
      // MSE loss
      assert(layers[layers.count - 1].neurons.count == expected_output.count,
              "Expected output does not have the same size as the last layer.");
  
      output := layers[layers.count - 1].neurons;
  
-    squared_sum: float = 0;
+    squared_sum: f32 = 0;
      for i: expected_output.count {
          diff := output[i] - expected_output[i];
          squared_sum += diff * diff;
@@ -115,36 +116,38 @@ neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
  
  
  Layer :: struct {
-    neurons                :   [] float;
-    pre_activation_neurons :   [] float;
+    use_bias   : bool;
+    is_input   : bool;
+    activation : ActivationFunction;
  
-    biases  :   [] float;
-    weights : [][] float; // CLEANUP: Make this a rank 1 slice
+    biases  :   [] f32;
+    weights : [][] f32; // CLEANUP: Make this a rank 1 slice
  
-    deltas  :   [] float;
+    neurons                : [] f32;
+    pre_activation_neurons : [] f32;
  
-    use_bias   : bool;
-    activation : ActivationFunction;
+    deltas  :   [] f32;
  }
  
-init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
-    neurons = memory.make_slice(float, layer_size, allocator);
-    pre_activation_neurons = memory.make_slice(float, layer_size, allocator);
+layer_init :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
+    neurons = memory.make_slice(f32, layer_size, allocator);
+    pre_activation_neurons = memory.make_slice(f32, layer_size, allocator);
  
      use_bias = true;
-    if use_bias {
-        biases = memory.make_slice(float, layer_size, allocator);
-    }
+    deltas = memory.make_slice(f32, layer_size, allocator);
+    activation = sigmoid_activation;
  
-    deltas = memory.make_slice(float, layer_size, allocator);
+    is_input = (prev_layer_size == 0);
  
-    activation = sigmoid_activation;
+    if !is_input {
+        if use_bias {
+            biases = memory.make_slice(f32, layer_size, allocator);
+        }
  
-    if prev_layer_size > 0 {
-        weights = memory.make_slice(#type [] float, layer_size, allocator);
+        weights = memory.make_slice(#type [] f32, layer_size, allocator);
  
          for ^weight: weights {
-            *weight = memory.make_slice(float, prev_layer_size, allocator);
+            *weight = memory.make_slice(f32, prev_layer_size, allocator);
          }
  
          randomize_weights_and_biases(layer);
@@ -154,12 +157,12 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat
  randomize_weights_and_biases :: (use layer: ^Layer) {
      for ^weight: weights {
          for ^w: *weight {
-            *w = cast(float) random.float(-0.5f, 0.5f);
+            *w = cast(f32) random.float(-0.5f, 0.5f);
          }
      }
  
      if use_bias {
-        for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+        for ^bias: biases do *bias = cast(f32) random.float(-0.5f, 0.5f);
      }
  }
  
@@ -178,40 +181,136 @@ layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
  }
  
  
+Onyx_NN_Magic_Number :: 0x4E4E584F
+
+neural_net_save :: (use nn: ^NeuralNet, filename: str) {
+    err, output_file := io.open(filename, io.OpenMode.Write);
+    assert(err == io.Error.None, "Failed to open neural net save file for writing.");
+    defer io.stream_close(^output_file);
+
+    writer := io.binary_writer_make(^output_file);
+
+    // Magic string
+    io.binary_write_i32(^writer, Onyx_NN_Magic_Number);
+
+    // Number of layers
+    io.binary_write_i32(^writer, layers.count);
+
+    for ^layer: layers {
+        io.binary_write_i32(^writer, layer.neurons.count);
+
+        io.binary_write_byte(^writer, cast(u8) layer.is_input);
+        if layer.is_input do continue;
+
+        io.binary_write_byte(^writer, cast(u8) layer.use_bias);
+        io.binary_write_byte(^writer, cast(u8) layer.activation.id);
+
+        if layer.use_bias {
+            io.binary_write_slice(^writer, layer.biases);
+        }
+
+        for ^weight: layer.weights {
+            io.binary_write_slice(^writer, *weight);
+        }
+    }
+}
+
+neural_net_load :: (filename: str) -> NeuralNet {
+    err, input_file := io.open(filename, io.OpenMode.Read);
+    assert(err == io.Error.None, "Failed to open neural net save file for reading.");
+    defer io.stream_close(^input_file);
+
+    reader := io.binary_reader_make(^input_file);
+
+    magic_number := io.binary_read_i32(^reader);
+    assert(magic_number == Onyx_NN_Magic_Number, "Magic number did not match!");
+
+    num_layers := io.binary_read_i32(^reader);
+
+    nn : NeuralNet;
+    neural_net_init(^nn, num_layers);
+
+    layer_allocator := alloc.arena.make_allocator(^nn.layer_arena);
+    prev_layer_size := 0;
+
+    for l: num_layers {
+        layer_size := io.binary_read_i32(^reader);
+        is_input   := cast(bool) io.binary_read_byte(^reader);
+
+        layer_init(^nn.layers[l], layer_size, prev_layer_size, layer_allocator);
+        if is_input do continue;
+
+        nn.layers[l].use_bias = cast(bool) io.binary_read_byte(^reader);
+
+        activation_id := cast(ActivationFunctionID) io.binary_read_byte(^reader);
+        nn.layers[l].activation = activation_function_from_id(activation_id);
+    }
+}
+
+
+
  
  
  
  
  
+
+// Solely used for serializing. Need a way to store the activation
+// functions uniquely and reproducibly.
+ActivationFunctionID :: enum (u8) {
+    Invalid            :: 0x00;
+    Sigmoid            :: 0x01;
+    Hyperbolic_Tangent :: 0x02;
+}
+
+activation_function_from_id :: (id: ActivationFunctionID) -> ActivationFunction {
+    switch id {
+        case ActivationFunctionID.Sigmoid do return sigmoid_activation;
+        case ActivationFunctionID.Hyperbolic_Tangent do return tanh_activation;
+
+        case #default do return ActivationFunction.{
+            ActivationFunctionID.Invalid,
+            null_proc, null_proc,
+        };
+    }
+}
+
  ActivationFunction :: struct {
-    forward  : (x : float)           -> float;
-    backward : (fx: float, x: float) -> float;
+    id       : ActivationFunctionID;
+    forward  : (x : f32)           -> f32;
+    backward : (fx: f32, x: f32) -> f32;
  }
  
  
-sigmoid_activation := ActivationFunction.{ sigmoid, sigmoid_prime }
+sigmoid_activation := ActivationFunction.{
+    ActivationFunctionID.Sigmoid,
+    sigmoid, sigmoid_prime
+}
  
-sigmoid :: (x: float) -> float {
+sigmoid :: (x: f32) -> f32 {
      ex := math.exp(x);
      return ex / (1 + ex);
  }
  
-sigmoid_prime :: (sx: float, _: float) -> float {
+sigmoid_prime :: (sx: f32, _: f32) -> f32 {
      // This is defined in terms of the sigmoid of x
      // sigma'(x) = sigma(x) * (1 - sigma(x))
      return sx * (1 - sx);
  }
  
  
-tanh_activation := ActivationFunction.{ tanh, tanh_prime };
+tanh_activation := ActivationFunction.{
+    ActivationFunctionID.Hyperbolic_Tangent,
+    tanh, tanh_prime
+}
  
-tanh :: (x: float) -> float {
+tanh :: (x: f32) -> f32 {
      ex  := math.exp(x);
      emx := math.exp(-x);
      return (ex - emx) / (ex + emx);
  }
  
-tanh_prime :: (_: float, x: float) -> float {
+tanh_prime :: (_: f32, x: f32) -> f32 {
      ex  := math.exp(x);
      emx := math.exp(-x);
      s   := emx + ex;
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Fri, 22 Jan 2021 22:05:54 +0000 (16:05 -0600)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 23 Feb 2021 04:00:14 +0000 (22:00 -0600)
src/mnist.onyx		patch \| blob \| history
src/neuralnet.onyx		patch \| blob \| history