From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Wed, 27 Jan 2021 04:13:19 +0000 (-0600)
Subject: cleanup; started work on CIFAR-10 dataset
X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=79fdea43b7af8d4900f06b77ca8fd779cd1df450;p=onyx-mnist.git

cleanup; started work on CIFAR-10 dataset
---

diff --git a/project.4coder b/project.4coder
index 9306168..50d3127 100644
--- a/project.4coder
+++ b/project.4coder
@@ -19,7 +19,7 @@ load_paths = {
  { load_paths_custom, .os = "mac"  },
 };
 
-build_win32   = "\\dev\\onyx\\onyx.exe -V src\\mnist.onyx -o mnist.wasm";
+build_win32   = "\\dev\\onyx\\onyx.exe -V src\\cifar10.onyx -o network.wasm";
 build_linux   = "/usr/bin/onyx -V src/mnist.onyx -o mnist.wasm";
 
 command_list = {
diff --git a/src/cifar10.onyx b/src/cifar10.onyx
new file mode 100644
index 0000000..edfbc32
--- /dev/null
+++ b/src/cifar10.onyx
@@ -0,0 +1,117 @@
+#load "core/std/wasi"
+
+#load_path "src"
+#load "neuralnet"
+
+use package core
+
+CIFAR10_DataLoader :: struct {
+    use data : DataLoader;
+    
+    data_file : io.FileStream;
+}
+
+cifar10_create :: (file_location := "data/cifar-10-batches-bin/data_batch_1.bin") -> CIFAR10_DataLoader {
+    dataset : CIFAR10_DataLoader;
+    dataset.vtable = ^cifar10_dataloader_functions;
+    
+    err : io.Error;
+    err, dataset.data_file = io.open(file_location);
+    assert(err == io.Error.None, "Failed to open CIFAR10 dataset file");
+    
+    return dataset;
+}
+
+cifar10_close :: (use dataset: ^CIFAR10_DataLoader) {
+    io.stream_close(^data_file);
+}
+
+cifar10_dataloader_functions := DataLoader_Functions.{
+    get_count = (use dataset: ^CIFAR10_DataLoader) -> u32 {
+        return 10000;
+    },
+    
+    get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+        assert(input.count  == 3072, "Input slice was of wrong size. Expected 3072.");
+        assert(output.count == 10,   "Output slice was of wrong size. Expected 10.");
+        
+        if index > 10000 do return false;
+        
+        location := index * (3072 + 1);
+        sample : [3072 + 1] u8;
+        _, bytes_read := io.stream_read_at(^data_file, location, ~~ sample);
+        
+        label := ~~sample[0];
+        // TODO(Brendan Hansen): NOT DONE
+    }
+};
+
+
+// TODO(Brendan Hansen): This was copied from mnist.onyx. There should be an easy way to abstract these.
+stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterion: Criterion = mean_squared_error) {
+    input := memory.make_slice(f32, 3072);
+    defer cfree(input.data);
+    expected : [10] f32;
+    
+    training_example_count := dataloader_get_count(dataloader);
+    
+    past_100_correct := 0;
+    for i: 10 {
+        for ex: training_example_count {
+            dataloader_get_item(dataloader, ex, input, ~~ expected);
+            
+            neural_net_forward(nn, ~~ input);
+            neural_net_backward(nn, ~~ expected, criterion);
+            
+            // The optimizing step should be put here.
+            
+            label, _   := array.greatest(expected);
+            prediction := neural_net_get_prediction(nn);
+            if prediction == label do past_100_correct += 1;
+            
+            if ex % 100 == 0 {
+                print_colored_array :: (arr: [] $T, color_idx: i32, color_code := 94) {
+                    for i: arr.count {
+                        if i == color_idx {
+                            printf("\x1b[%im", color_code);
+                            print(arr[i]);
+                            print("\x1b[0m ");
+                        } else {
+                            print(arr[i]);
+                            print(" ");
+                        }
+                    }
+                    print("\n");
+                }
+                
+                color := 94;
+                if prediction != label do color = 91;
+                
+                output := neural_net_get_output(nn);
+                
+                print_colored_array(cast([] f32) expected, label, color);
+                print_colored_array(output, prediction, color);
+                
+                loss := neural_net_loss(nn, ~~ expected, criterion);
+                printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
+                
+                past_100_correct = 0;
+                
+                /*
+                if ex % 10000 == 0 {
+                    println("Saving neural network...");
+                    neural_net_save(nn, "data/test_4.nn");
+                }
+                */
+            }
+        }
+    }
+}
+
+
+main :: (args: [] cstr) {
+    println("Hello World!");
+    
+    cifar10_dataloader := cifar10_create();
+    defer cifar10_close(^cifar10_dataloader);
+}
\ No newline at end of file
diff --git a/src/mnist.onyx b/src/mnist.onyx
index b391419..155542a 100644
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -77,6 +77,8 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio
             neural_net_forward(nn, ~~ input);
             neural_net_backward(nn, ~~ expected, criterion);
             
+            // The optimizing step should be put here.
+            
             label, _   := array.greatest(expected);
             prediction := neural_net_get_prediction(nn);
             if prediction == label do past_100_correct += 1;
@@ -108,11 +110,13 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio
                 printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
 
                 past_100_correct = 0;
-
+                
+                /*
                 if ex % 10000 == 0 {
                     println("Saving neural network...");
-                    neural_net_save(nn, "data/test_3.nn");
+                    neural_net_save(nn, "data/test_4.nn");
                 }
+                */
             }
         }
     }
@@ -123,8 +127,8 @@ main :: (args: [] cstr) {
     // main_allocator := context.allocator;
     // context.allocator = alloc.log.logging_allocator(^main_allocator);
 
-//    nn := neural_net_load("data/test_2.nn");
-    nn := make_neural_net(28 * 28, 1024, 256, 100, 10);
+    //nn := neural_net_load("data/test_3.nn");
+    nn := make_neural_net(28 * 28, 512, 256, 100, 10);
     defer neural_net_free(^nn);
 
     random.set_seed(5234);
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx
index c50f4e5..e91535a 100644
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -1,9 +1,14 @@
 use package core
 
+//
+// General purpose Multi-Layer Perceptron (MLP)
+//
+
 NeuralNet :: struct {
     layers : [] Layer;
 
-    // CLEANUP: Move these to core.alloc, so the nesting isn't nearly as terrible.
+    // CLEANUP(Brendan Hansen): Move all allocators to core.alloc,
+    // so the nesting isn't nearly as terrible.
     layer_arena : alloc.arena.ArenaState;
 }
 
@@ -47,14 +52,16 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion:
             "Expected output does not have the same size as the last layer.");
 
     LEARNING_RATE :: cast(f32) 0.01;
-
+    
+    // NOTE(Brendan Hansen):
     // Iterating backwards through the layers (hence the name "back propagation")
     // The reason this is necessary is because we need to know the derivatives of
     // neurons in the next layer to compute the derivatives of the current layers
     // neurons. This is what makes this algorithm not exponentially slow.
     while i := layers.count - 1; i >= 1 {
         defer i -= 1;
-
+        
+        // NOTE(Brendan Hansen):
         // For every neuron, we need to calculate its corresponding "delta", which is
         // kind of an ambiguous term here. It specifically means the partial derivative
         // of the the loss with respect to the weighted sum of the previous layers
@@ -65,24 +72,21 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion:
         if i == layers.count - 1 {
             criterion.compute_deltas(layers[i].deltas, layers[i].neurons, expected_output);
             
-        } else {
-            for j: layers[i].neurons.count {
-                d_neuron: f32 = 0;
-                for k: layers[i + 1].neurons.count {
-                    d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j];
-                }
-
-                layers[i].deltas[j] = d_neuron;
+            // NOTE(Brendan Hansen): 
+            // Here we multiply by the derivative of the activation function for each neuron.
+            // This is done in the layer_backward function, but since that isn't called for the
+            // last layer, it is necessary to do it here.
+            for j: layers[i].deltas.count {
+                d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
+                layers[i].deltas[j] *= d_sigmoid_value;
             }
-        }
-        
-        // Here we multiply by the derivative of the activation function for each neuron.
-        for j: layers[i].deltas.count {
-            d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
-            layers[i].deltas[j] *= d_sigmoid_value;
+            
+        } else {
+            layer_backward(^layers[i], ^layers[i + 1]);
         }
     }
-
+    
+    // NOTE(Brendan Hansen): 
     // Once all the deltas are computed, we can use them to compute the actual
     // derivatives and update the biases and weights.
     // This part is responsible for optimization, and can easily be swapped out.
@@ -184,6 +188,21 @@ layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
     }
 }
 
+layer_backward :: (use layer: ^Layer, next_layer: ^Layer) {
+    for j: neurons.count {
+        d_neuron: f32 = 0;
+        for k: next_layer.neurons.count {
+            d_neuron += next_layer.deltas[k] * next_layer.weights[k][j];
+        }
+        
+        d_sigmoid_value := activation.backward(neurons[j], pre_activation_neurons[j]);
+        
+        // This could easily become '+=', which would allow for an accumulated gradient,
+        // before taking a step.
+        deltas[j] = d_neuron * d_sigmoid_value;
+    }
+}
+
 
 Onyx_NN_Magic_Number :: 0x4E4E584F
 
@@ -391,9 +410,6 @@ mean_squared_error := Criterion.{
     },
     
     compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
-        // Leaving the assert turned off for right now.
-        // assert(predictions.count == expected.count && expected.count == deltas.count, "Expected output does not have the same size as predictions.");
-        
         for j: deltas.count {
             deltas[j] = 2 * (expected[j] - predictions[j]) / ~~expected.count;
         }
@@ -415,13 +431,11 @@ mean_absolute_error := Criterion.{
     },
     
     compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
-        // Leaving the assert turned off for right now.
-        // assert(predictions.count == expected.count, "Expected output does not have the same size as predictions.");
-        
         for j: deltas.count {
             deltas[j] = 1.0f;
             if expected[j] < predictions[j] do deltas[j] = -1.0f;
             
+            // TODO(Brendan Hansen):
             // Technically, this division should be here, but it doesn't appear to be helping the gradient descent.
             deltas[j] /= cast(f32) expected.count;
         }
@@ -435,6 +449,7 @@ mean_absolute_error := Criterion.{
 //
 // Very basic datastructure that represents something you can loader data out of.
 // Specifically, an input and output at a particular index.
+//
 
 DataLoader :: struct {
     vtable : ^DataLoader_Functions;