From 8e44880889b7767c35c7900556d92f689a2f1546 Mon Sep 17 00:00:00 2001
From: Brendan Hansen <brendan.f.hansen@gmail.com>
Date: Thu, 28 Jan 2021 13:32:21 -0600
Subject: [PATCH] generalized dataloader

---
 src/cifar10.onyx   | 52 +++++++++++++++++++++++++++-------------------
 src/mnist.onyx     | 44 ++++++++++++++++++++++-----------------
 src/neuralnet.onyx | 31 +++++++++++++++------------
 3 files changed, 74 insertions(+), 53 deletions(-)

diff --git a/src/cifar10.onyx b/src/cifar10.onyx
index 42bb2e2..7bbb284 100644
--- a/src/cifar10.onyx
+++ b/src/cifar10.onyx
@@ -8,11 +8,16 @@ use package core
 // NOTE(Brendan Hansen): Currently, this supports only loading one of the dataset files,
 // even through there are 6 of them.
 CIFAR10_DataLoader :: struct {
-    use data : DataLoader;
+    use data : DataLoader(CIFAR10_Sample);
     
     data_file : io.FileStream;
 }
 
+CIFAR10_Sample :: struct {
+    input  : [] f32;
+    output : [] f32;
+}
+
 cifar10_create :: (file_location := "data/cifar-10-batches-bin/data_batch_1.bin") -> CIFAR10_DataLoader {
     dataset : CIFAR10_DataLoader;
     dataset.vtable = ^cifar10_dataloader_functions;
@@ -28,27 +33,27 @@ cifar10_close :: (use dataset: ^CIFAR10_DataLoader) {
     io.stream_close(^data_file);
 }
 
-cifar10_dataloader_functions := DataLoader_Functions.{
+cifar10_dataloader_functions := <DataLoader_Functions(CIFAR10_Sample)>.{
     get_count = (use dataset: ^CIFAR10_DataLoader) -> u32 {
         return 10000;
     },
     
-    get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+    get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, use sample: ^CIFAR10_Sample) -> bool {
         assert(input.count  == 3072, "Input slice was of wrong size. Expected 3072.");
         assert(output.count == 10,   "Output slice was of wrong size. Expected 10.");
         
         if index > 10000 do return false;
         
         location := index * (3072 + 1);
-        sample : [3072 + 1] u8;
-        _, bytes_read := io.stream_read_at(^data_file, location, ~~ sample);
+        raw_input : [3072 + 1] u8;
+        _, bytes_read := io.stream_read_at(^data_file, location, ~~ raw_input);
         
-        label := ~~sample[0];
+        label := ~~raw_input[0];
         for ^o: output do *o = 0;
         output[cast(u32) label] = 1;
         
         for i: 3072 {
-            input[i] = (cast(f32) cast(u32) sample[i + 1]) / 255;
+            input[i] = (cast(f32) cast(u32) raw_input[i + 1]) / 255;
         }
         
         return true;
@@ -57,24 +62,26 @@ cifar10_dataloader_functions := DataLoader_Functions.{
 
 
 // TODO(Brendan Hansen): This was copied from mnist.onyx. There should be an easy way to abstract these.
-stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterion: Criterion = mean_squared_error) {
-    input := memory.make_slice(f32, 3072);
-    defer cfree(input.data);
-    expected : [10] f32;
+train :: (nn: ^NeuralNet, dataloader: ^DataLoader(CIFAR10_Sample), optimizer: ^Optimizer, criterion: Criterion = mean_squared_error) {
+    sample : CIFAR10_Sample;
+    sample.input  = memory.make_slice(f32, 3072);
+    sample.output = memory.make_slice(f32, 10);
+    defer cfree(sample.input.data);
+    defer cfree(sample.output.data);
     
     training_example_count := dataloader_get_count(dataloader);
     
     past_100_correct := 0;
     for i: 10 {
         for ex: training_example_count {
-            dataloader_get_item(dataloader, ex, input, ~~ expected);
+            dataloader_get_item(dataloader, ex, ^sample);
             
-            neural_net_forward(nn, ~~ input);
-            neural_net_backward(nn, ~~ expected, criterion);
+            optimizer_zero_gradient(optimizer);
+            neural_net_forward(nn, sample.input);
+            neural_net_backward(nn, sample.output, criterion);
+            optimizer_step(optimizer);
             
-            // The optimizing step should be put here.
-            
-            label, _   := array.greatest(expected);
+            label, _   := array.greatest(sample.output);
             prediction := neural_net_get_prediction(nn);
             if prediction == label do past_100_correct += 1;
             
@@ -98,15 +105,15 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio
                 
                 output := neural_net_get_output(nn);
                 
-                print_colored_array(cast([] f32) expected, label, color);
+                print_colored_array(sample.output, label, color);
                 print_colored_array(output, prediction, color);
                 
-                loss := neural_net_loss(nn, ~~ expected, criterion);
+                loss := neural_net_loss(nn, sample.output, criterion);
                 printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
                 
                 past_100_correct = 0;
                
-                if ex % 1000 == 0 {
+                if ex % 10000 == 0 {
                     println("Saving neural network...");
                     neural_net_save(nn, output_file);
                 }
@@ -133,5 +140,8 @@ main :: (args: [] cstr) {
     nn := make_neural_net(3072, 1024, 256, 10);
     defer neural_net_free(^nn);
     
-    stocastic_gradient_descent(^nn, ^cifar10_dataloader);
+    optimizer := sgd_optimizer_create(^nn, learning_rate = 0.01f);
+    neural_net_supply_parameters(^nn, ^optimizer);
+    
+    train(^nn, ^cifar10_dataloader, ^optimizer);
 }
\ No newline at end of file
diff --git a/src/mnist.onyx b/src/mnist.onyx
index 6163218..87e5aaa 100644
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -5,14 +5,21 @@
 
 use package core
 
-
 MNIST_DataLoader :: struct {
-    use base : DataLoader;
+    use base : DataLoader(MNIST_Sample);
     
     images : io.FileStream;
     labels : io.FileStream;
 }
 
+MNIST_Sample :: struct {
+    // NOTE(Brendan Hansen): Expected to be 28 * 28 elements in size
+    input : [] f32;
+    
+    // NOTE(Brendan Hansen): Expected to be 10 elements in size
+    output : [] f32;
+}
+
 mnist_data_make :: (image_path := "data/train-images-idx3-ubyte", label_path := "data/train-labels-idx1-ubyte") -> MNIST_DataLoader {
     mnist_data: MNIST_DataLoader;
     mnist_data.vtable = ^mnist_dataloader_functions;
@@ -32,12 +39,12 @@ mnist_data_close :: (use mnist_data: ^MNIST_DataLoader) {
     io.stream_close(^labels);
 }
 
-mnist_dataloader_functions := DataLoader_Functions.{
+mnist_dataloader_functions := <DataLoader_Functions(MNIST_Sample)>.{
     get_count = (use data: ^MNIST_DataLoader) -> u32 {
         return 50000;
     },
     
-    get_item  = (use data: ^MNIST_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+    get_item  = (use data: ^MNIST_DataLoader, index: u32, use sample: ^MNIST_Sample) -> bool {
         assert(input.count  == 28 * 28, "Input slice was of wrong size. Expected 784.");
         assert(output.count == 10,      "Output slice was of wrong size. Expected 10.");
         
@@ -61,10 +68,13 @@ mnist_dataloader_functions := DataLoader_Functions.{
     }
 }
 
-train :: (nn: ^NeuralNet, dataloader: ^DataLoader, optimizer: ^Optimizer, criterion: Criterion = mean_squared_error) {
-    input := memory.make_slice(f32, 784);
-    defer cfree(input.data);
-    expected : [10] f32;
+// TODO(Brendan Hansen): Generalize this to all data types 
+train :: (nn: ^NeuralNet, dataloader: ^DataLoader(MNIST_Sample), optimizer: ^Optimizer, criterion: Criterion = mean_squared_error) {
+    sample : MNIST_Sample;
+    sample.input  = memory.make_slice(f32, 784);
+    sample.output = memory.make_slice(f32, 10);
+    defer cfree(sample.input.data);
+    defer cfree(sample.output.data);
 
     training_example_count := dataloader_get_count(dataloader);
     
@@ -72,20 +82,16 @@ train :: (nn: ^NeuralNet, dataloader: ^DataLoader, optimizer: ^Optimizer, criter
     for i: 10 {
         printf("Staring epoch %i ===================================\n", i);
         for ex: training_example_count {
-            dataloader_get_item(dataloader, ex, input, ~~ expected);
-            
-            // NOTE(Brendan Hansen): Currently, zeroing the gradient is not
-            // necessary because neural_net_backward replaces the gradient,
-            // in other words it doesn't add to the existing gradient.
-            // optimizer_zero_gradient(optimizer);
+            dataloader_get_item(dataloader, ex, ^sample);
             
-            neural_net_forward(nn, ~~ input);
-            neural_net_backward(nn, ~~ expected, criterion);
+            optimizer_zero_gradient(optimizer);
+            neural_net_forward(nn, ~~ sample.input);
+            neural_net_backward(nn, ~~ sample.output, criterion);
             optimizer_step(optimizer);
 
 
             // NOTE(Brendan Hansen): Prediction printing and tracking.
-            label, _   := array.greatest(expected);
+            label, _   := array.greatest(sample.output);
             prediction := neural_net_get_prediction(nn);
             if prediction == label do past_100_correct += 1;
 
@@ -109,10 +115,10 @@ train :: (nn: ^NeuralNet, dataloader: ^DataLoader, optimizer: ^Optimizer, criter
 
                 output := neural_net_get_output(nn);
 
-                print_colored_array(cast([] f32) expected, label, color);
+                print_colored_array(sample.output, label, color);
                 print_colored_array(output, prediction, color);
 
-                loss := neural_net_loss(nn, ~~ expected, criterion);
+                loss := neural_net_loss(nn, sample.output, criterion);
                 printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
 
                 past_100_correct = 0;
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx
index ad2d71e..e232c57 100644
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -101,12 +101,12 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion:
     for i: 1 .. layers.count {
         for j: layers[i].neurons.count {
             if layers[i].use_bias {
-                layers[i].biases[j].delta = layers[i].deltas[j];
+                layers[i].biases[j].delta += layers[i].deltas[j];
             }
 
             prev_layer_count := layers[i - 1].neurons.count;
             for k: prev_layer_count {
-                layers[i].weights[j * prev_layer_count + k].delta = layers[i].deltas[j] * layers[i - 1].neurons[k];
+                layers[i].weights[j * prev_layer_count + k].delta += layers[i].deltas[j] * layers[i - 1].neurons[k];
             }
         }
     }
@@ -234,9 +234,14 @@ neural_net_save :: (use nn: ^NeuralNet, filename: str) {
 
         io.binary_write_byte(^writer, cast(u8) layer.use_bias);
         io.binary_write_byte(^writer, cast(u8) layer.activation.id);
-
+        
+        // TODO(Brendan Hansen): These are so slow because of the enormous
+        // amount of writes that have to happen. I would like to write them
+        // in bulk, but the problem is that the data that needs to be stored
+        // is not contiguous in memory because of AOS style of variables. If
+        // that could be changed to a SOA style, the saving and loading process
+        // here could be made much much faster.
         if layer.use_bias {
-//            io.binary_write_slice(^writer, layer.biases);
             for ^bias: layer.biases {
                 io.binary_write(^writer, f32, ^bias.value);
             }
@@ -465,29 +470,29 @@ mean_absolute_error := Criterion.{
 // Specifically, an input and output at a particular index.
 //
 
-DataLoader :: struct {
-    vtable : ^DataLoader_Functions;
+DataLoader :: struct (Sample_Type: type_expr) {
+    vtable : ^DataLoader_Functions(Sample_Type);
 }
 
-DataLoader_Functions :: struct {
-    get_count : (^DataLoader) -> u32;
+DataLoader_Functions :: struct (Sample_Type: type_expr) {
+    get_count : (^DataLoader(Sample_Type)) -> u32;
     
     // I don't like how these have to be floats, but they seem reasonable for now.
-    get_item  : (^DataLoader, index: u32, input: [] f32, output: [] f32) -> bool;
+    get_item  : (^DataLoader(Sample_Type), index: u32, sample: ^Sample_Type) -> bool;
 }
 
-dataloader_get_count :: (use data: ^DataLoader) -> u32 {
+dataloader_get_count :: (use data: ^DataLoader($Sample_Type)) -> u32 {
     if vtable == null do return 0;
     if vtable.get_count == null_proc do return 0;
     
     return vtable.get_count(data);
 }
 
-dataloader_get_item :: (use data: ^DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+dataloader_get_item :: (use data: ^DataLoader($Sample_Type), index: u32, sample: ^Sample_Type) -> bool {
     if vtable == null do return false;
     if vtable.get_item == null_proc do return false;
     
-    return vtable.get_item(data, index, input, output);
+    return vtable.get_item(data, index, sample);
 }
 
 
@@ -561,7 +566,7 @@ sgd_optimizer_create :: (nn: ^NeuralNet, learning_rate := 0.01f, allocator := co
     sgd.vtable = ^sgd_optimizer_vtable;
     optimizer_init(^sgd, nn, allocator);
 
-    learning_rate = learning_rate;
+    sgd.learning_rate = learning_rate;
 
     return sgd;
 }
-- 
2.25.1