From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Fri, 22 Jan 2021 22:05:54 +0000 (-0600)
Subject: added basics of serializing networks
X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=4a0c198888075e847a6ede5a223efa91d151ae31;p=onyx-mnist.git

added basics of serializing networks
---

diff --git a/src/mnist.onyx b/src/mnist.onyx
index 1099db1..efad3aa 100644
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -42,8 +42,8 @@ load_example :: (use mnist_data: ^MNIST_Data, example: u32, out: [784] u8) -> u3
 
 stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training_examples := 50000) {
     example : [784] u8;
-    expected := float.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
-    input := memory.make_slice(float, 784);
+    expected := f32.[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
+    input := memory.make_slice(f32, 784);
     defer cfree(input.data);
 
     for i: 10 {
@@ -53,7 +53,7 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
             defer expected[label] = 0.0f;
 
             // CLEANUP: The double cast that is necessary here is gross.
-            for i: input.count do input[i] = (cast(float) cast(u32) example[i]) / 255;
+            for i: input.count do input[i] = (cast(f32) cast(u32) example[i]) / 255;
 
             neural_net_forward(nn, ~~ input);
             neural_net_backward(nn, ~~ expected);
@@ -65,9 +65,9 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
 
                     for i: arr.count {
                         if i == greatest_idx {
-                            printf("\x1b[94m%f\x1b[0m ", arr[i]);
+                            printf("\x1b[94m%f\x1b[0m ", cast(f32) arr[i]);
                         } else {
-                            printf("%f ", arr[i]);
+                            printf("%f ", cast(f32) arr[i]);
                         }
                     }
                     print("\n");
@@ -80,6 +80,11 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, mnist_data: ^MNIST_Data, training
 
                 loss := neural_net_loss(nn, ~~ expected);
                 printf("MSE loss: %f\n", cast(f32) loss);
+
+                if ex % 10000 == 0 {
+                    println("Saving neural network...");
+                    neural_net_save(nn, "data/test_1.nn");
+                }
             }
         }
     }
@@ -91,6 +96,8 @@ main :: (args: [] cstr) {
     // main_allocator := context.allocator;
     // context.allocator = alloc.log.logging_allocator(^main_allocator);
 
+    _ := neural_net_load("data/dummy.nn");
+
     random.set_seed(5234);
 
     mnist_data := mnist_data_make();
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx
index 48c1b3a..884d6a7 100644
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -1,9 +1,5 @@
 use package core
 
-// To easily change to 64-bit floats if needed.
-float :: #type f32;
-
-
 NeuralNet :: struct {
     layers  : [] Layer;
 
@@ -13,25 +9,30 @@ NeuralNet :: struct {
 
 make_neural_net :: (layer_sizes: ..i32) -> NeuralNet {
     net : NeuralNet;
+    neural_net_init(^net, layer_sizes.count);
 
-    net.layer_arena  = alloc.arena.make(context.allocator, 64 * 1024 * 1024); // 64 MiB
     layer_allocator := alloc.arena.make_allocator(^net.layer_arena);
 
-    net.layers = memory.make_slice(Layer, layer_sizes.count, allocator = layer_allocator);
-
-    init_layer(^net.layers[0], layer_sizes[0], 0, allocator = layer_allocator);
+    layer_init(^net.layers[0], layer_sizes[0], 0, allocator = layer_allocator);
     for i: 1 .. net.layers.count {
-        init_layer(^net.layers[i], layer_sizes[i], layer_sizes[i - 1], allocator = layer_allocator);
+        layer_init(^net.layers[i], layer_sizes[i], layer_sizes[i - 1], allocator = layer_allocator);
     }
 
     return net;
 }
 
+neural_net_init :: (use nn: ^NeuralNet, layer_count: u32) {
+    layer_arena      = alloc.arena.make(context.allocator, 64 * 1024 * 1024); // 64 MiB
+    layer_allocator := alloc.arena.make_allocator(^layer_arena);
+
+    layers = memory.make_slice(Layer, layer_count, allocator = layer_allocator);
+}
+
 neural_net_free :: (use nn: ^NeuralNet) {
     alloc.arena.free(^layer_arena);
 }
 
-neural_net_forward :: (use nn: ^NeuralNet, input: [] float) {
+neural_net_forward :: (use nn: ^NeuralNet, input: [] f32) {
     assert(input.count == layers[0].neurons.count, "Input does not have the same size as the first layer.");
 
     for i: input.count do layers[0].neurons[i] = input[i];
@@ -41,11 +42,11 @@ neural_net_forward :: (use nn: ^NeuralNet, input: [] float) {
     }
 }
 
-neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
+neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32) {
     assert(layers[layers.count - 1].neurons.count == expected_output.count,
             "Expected output does not have the same size as the last layer.");
 
-    LEARNING_RATE :: cast(float) 0.01;
+    LEARNING_RATE :: cast(f32) 0.01;
 
     // Iterating backwards through the layers (hence the name "back propagation")
     // The reason this is necessary is because we need to know the derivatives of
@@ -55,20 +56,20 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
         defer i -= 1;
 
         // For every neuron, we need to calculate its corresponding "delta", which is
-        // kind of an abiguous term here. It specifically means the partial derivative
+        // kind of an ambiguous term here. It specifically means the partial derivative
         // of the the loss with respect to the weighted sum of the previous layers
         // neurons, plus a bias.
         for j: layers[i].neurons.count {
             sigmoid_value   := layers[i].neurons[j];
             d_sigmoid_value := layers[i].activation.backward(sigmoid_value, layers[i].pre_activation_neurons[j]);
 
-            // The last layer has its deriviate computed special, since it needs to capture
+            // The last layer has its derivative computed special, since it needs to capture
             // the derivative of the MSE function.
             if i == layers.count - 1 {
                 layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value / ~~expected_output.count;
 
             } else {
-                d_neuron: float = 0;
+                d_neuron: f32 = 0;
                 for k: layers[i + 1].neurons.count {
                     d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j];
                 }
@@ -92,18 +93,18 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) {
     }
 }
 
-neural_net_get_output :: (use nn: ^NeuralNet) -> [] float {
+neural_net_get_output :: (use nn: ^NeuralNet) -> [] f32 {
     return layers[layers.count - 1].neurons;
 }
 
-neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
+neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] f32) -> f32 {
     // MSE loss
     assert(layers[layers.count - 1].neurons.count == expected_output.count,
             "Expected output does not have the same size as the last layer.");
 
     output := layers[layers.count - 1].neurons;
 
-    squared_sum: float = 0;
+    squared_sum: f32 = 0;
     for i: expected_output.count {
         diff := output[i] - expected_output[i];
         squared_sum += diff * diff;
@@ -115,36 +116,38 @@ neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float {
 
 
 Layer :: struct {
-    neurons                :   [] float;
-    pre_activation_neurons :   [] float;
+    use_bias   : bool;
+    is_input   : bool;
+    activation : ActivationFunction;
 
-    biases  :   [] float;
-    weights : [][] float; // CLEANUP: Make this a rank 1 slice
+    biases  :   [] f32;
+    weights : [][] f32; // CLEANUP: Make this a rank 1 slice
 
-    deltas  :   [] float;
+    neurons                : [] f32;
+    pre_activation_neurons : [] f32;
 
-    use_bias   : bool;
-    activation : ActivationFunction;
+    deltas  :   [] f32;
 }
 
-init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
-    neurons = memory.make_slice(float, layer_size, allocator);
-    pre_activation_neurons = memory.make_slice(float, layer_size, allocator);
+layer_init :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) {
+    neurons = memory.make_slice(f32, layer_size, allocator);
+    pre_activation_neurons = memory.make_slice(f32, layer_size, allocator);
 
     use_bias = true;
-    if use_bias {
-        biases = memory.make_slice(float, layer_size, allocator);
-    }
+    deltas = memory.make_slice(f32, layer_size, allocator);
+    activation = sigmoid_activation;
 
-    deltas = memory.make_slice(float, layer_size, allocator);
+    is_input = (prev_layer_size == 0);
 
-    activation = sigmoid_activation;
+    if !is_input {
+        if use_bias {
+            biases = memory.make_slice(f32, layer_size, allocator);
+        }
 
-    if prev_layer_size > 0 {
-        weights = memory.make_slice(#type [] float, layer_size, allocator);
+        weights = memory.make_slice(#type [] f32, layer_size, allocator);
 
         for ^weight: weights {
-            *weight = memory.make_slice(float, prev_layer_size, allocator);
+            *weight = memory.make_slice(f32, prev_layer_size, allocator);
         }
 
         randomize_weights_and_biases(layer);
@@ -154,12 +157,12 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat
 randomize_weights_and_biases :: (use layer: ^Layer) {
     for ^weight: weights {
         for ^w: *weight {
-            *w = cast(float) random.float(-0.5f, 0.5f);
+            *w = cast(f32) random.float(-0.5f, 0.5f);
         }
     }
 
     if use_bias {
-        for ^bias: biases do *bias = cast(float) random.float(-0.5f, 0.5f);
+        for ^bias: biases do *bias = cast(f32) random.float(-0.5f, 0.5f);
     }
 }
 
@@ -178,40 +181,136 @@ layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
 }
 
 
+Onyx_NN_Magic_Number :: 0x4E4E584F
+
+neural_net_save :: (use nn: ^NeuralNet, filename: str) {
+    err, output_file := io.open(filename, io.OpenMode.Write);
+    assert(err == io.Error.None, "Failed to open neural net save file for writing.");
+    defer io.stream_close(^output_file);
+
+    writer := io.binary_writer_make(^output_file);
+
+    // Magic string
+    io.binary_write_i32(^writer, Onyx_NN_Magic_Number);
+
+    // Number of layers
+    io.binary_write_i32(^writer, layers.count);
+
+    for ^layer: layers {
+        io.binary_write_i32(^writer, layer.neurons.count);
+
+        io.binary_write_byte(^writer, cast(u8) layer.is_input);
+        if layer.is_input do continue;
+
+        io.binary_write_byte(^writer, cast(u8) layer.use_bias);
+        io.binary_write_byte(^writer, cast(u8) layer.activation.id);
+
+        if layer.use_bias {
+            io.binary_write_slice(^writer, layer.biases);
+        }
+
+        for ^weight: layer.weights {
+            io.binary_write_slice(^writer, *weight);
+        }
+    }
+}
+
+neural_net_load :: (filename: str) -> NeuralNet {
+    err, input_file := io.open(filename, io.OpenMode.Read);
+    assert(err == io.Error.None, "Failed to open neural net save file for reading.");
+    defer io.stream_close(^input_file);
+
+    reader := io.binary_reader_make(^input_file);
+
+    magic_number := io.binary_read_i32(^reader);
+    assert(magic_number == Onyx_NN_Magic_Number, "Magic number did not match!");
+
+    num_layers := io.binary_read_i32(^reader);
+
+    nn : NeuralNet;
+    neural_net_init(^nn, num_layers);
+
+    layer_allocator := alloc.arena.make_allocator(^nn.layer_arena);
+    prev_layer_size := 0;
+
+    for l: num_layers {
+        layer_size := io.binary_read_i32(^reader);
+        is_input   := cast(bool) io.binary_read_byte(^reader);
+
+        layer_init(^nn.layers[l], layer_size, prev_layer_size, layer_allocator);
+        if is_input do continue;
+
+        nn.layers[l].use_bias = cast(bool) io.binary_read_byte(^reader);
+
+        activation_id := cast(ActivationFunctionID) io.binary_read_byte(^reader);
+        nn.layers[l].activation = activation_function_from_id(activation_id);
+    }
+}
+
+
+
 
 
 
 
 
+
+// Solely used for serializing. Need a way to store the activation
+// functions uniquely and reproducibly.
+ActivationFunctionID :: enum (u8) {
+    Invalid            :: 0x00;
+    Sigmoid            :: 0x01;
+    Hyperbolic_Tangent :: 0x02;
+}
+
+activation_function_from_id :: (id: ActivationFunctionID) -> ActivationFunction {
+    switch id {
+        case ActivationFunctionID.Sigmoid do return sigmoid_activation;
+        case ActivationFunctionID.Hyperbolic_Tangent do return tanh_activation;
+
+        case #default do return ActivationFunction.{
+            ActivationFunctionID.Invalid,
+            null_proc, null_proc,
+        };
+    }
+}
+
 ActivationFunction :: struct {
-    forward  : (x : float)           -> float;
-    backward : (fx: float, x: float) -> float;
+    id       : ActivationFunctionID;
+    forward  : (x : f32)           -> f32;
+    backward : (fx: f32, x: f32) -> f32;
 }
 
 
-sigmoid_activation := ActivationFunction.{ sigmoid, sigmoid_prime }
+sigmoid_activation := ActivationFunction.{
+    ActivationFunctionID.Sigmoid,
+    sigmoid, sigmoid_prime
+}
 
-sigmoid :: (x: float) -> float {
+sigmoid :: (x: f32) -> f32 {
     ex := math.exp(x);
     return ex / (1 + ex);
 }
 
-sigmoid_prime :: (sx: float, _: float) -> float {
+sigmoid_prime :: (sx: f32, _: f32) -> f32 {
     // This is defined in terms of the sigmoid of x
     // sigma'(x) = sigma(x) * (1 - sigma(x))
     return sx * (1 - sx);
 }
 
 
-tanh_activation := ActivationFunction.{ tanh, tanh_prime };
+tanh_activation := ActivationFunction.{
+    ActivationFunctionID.Hyperbolic_Tangent,
+    tanh, tanh_prime
+}
 
-tanh :: (x: float) -> float {
+tanh :: (x: f32) -> f32 {
     ex  := math.exp(x);
     emx := math.exp(-x);
     return (ex - emx) / (ex + emx);
 }
 
-tanh_prime :: (_: float, x: float) -> float {
+tanh_prime :: (_: f32, x: f32) -> f32 {
     ex  := math.exp(x);
     emx := math.exp(-x);
     s   := emx + ex;