cleanup; started work on CIFAR-10 dataset

author Brendan Hansen <brendan.f.hansen@gmail.com>

Wed, 27 Jan 2021 04:13:19 +0000 (22:13 -0600)

committer Brendan Hansen <brendan.f.hansen@gmail.com>

Tue, 23 Feb 2021 04:00:15 +0000 (22:00 -0600)
author Brendan Hansen <brendan.f.hansen@gmail.com>
Wed, 27 Jan 2021 04:13:19 +0000 (22:13 -0600)
committer Brendan Hansen <brendan.f.hansen@gmail.com>
Tue, 23 Feb 2021 04:00:15 +0000 (22:00 -0600)
diff --git a/project.4coder b/project.4coder

index 930616845d478a8701874950b5b5b084cffe0e22..50d312714086a77984f949934780c302585fada1 100644 (file)
--- a/project.4coder
+++ b/project.4coder
@@ -19,7 +19,7 @@ load_paths = {
   { load_paths_custom, .os = "mac"  },
  };
  
-build_win32   = "\\dev\\onyx\\onyx.exe -V src\\mnist.onyx -o mnist.wasm";
+build_win32   = "\\dev\\onyx\\onyx.exe -V src\\cifar10.onyx -o network.wasm";
  build_linux   = "/usr/bin/onyx -V src/mnist.onyx -o mnist.wasm";
  
  command_list = {
diff --git a/src/cifar10.onyx b/src/cifar10.onyx

new file mode 100644 (file)

index 0000000..edfbc32
--- /dev/null
+++ b/src/cifar10.onyx
@@ -0,0 +1,117 @@
+#load "core/std/wasi"
+
+#load_path "src"
+#load "neuralnet"
+
+use package core
+
+CIFAR10_DataLoader :: struct {
+    use data : DataLoader;
+    
+    data_file : io.FileStream;
+}
+
+cifar10_create :: (file_location := "data/cifar-10-batches-bin/data_batch_1.bin") -> CIFAR10_DataLoader {
+    dataset : CIFAR10_DataLoader;
+    dataset.vtable = ^cifar10_dataloader_functions;
+    
+    err : io.Error;
+    err, dataset.data_file = io.open(file_location);
+    assert(err == io.Error.None, "Failed to open CIFAR10 dataset file");
+    
+    return dataset;
+}
+
+cifar10_close :: (use dataset: ^CIFAR10_DataLoader) {
+    io.stream_close(^data_file);
+}
+
+cifar10_dataloader_functions := DataLoader_Functions.{
+    get_count = (use dataset: ^CIFAR10_DataLoader) -> u32 {
+        return 10000;
+    },
+    
+    get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+        assert(input.count  == 3072, "Input slice was of wrong size. Expected 3072.");
+        assert(output.count == 10,   "Output slice was of wrong size. Expected 10.");
+        
+        if index > 10000 do return false;
+        
+        location := index * (3072 + 1);
+        sample : [3072 + 1] u8;
+        _, bytes_read := io.stream_read_at(^data_file, location, ~~ sample);
+        
+        label := ~~sample[0];
+        // TODO(Brendan Hansen): NOT DONE
+    }
+};
+
+
+// TODO(Brendan Hansen): This was copied from mnist.onyx. There should be an easy way to abstract these.
+stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterion: Criterion = mean_squared_error) {
+    input := memory.make_slice(f32, 3072);
+    defer cfree(input.data);
+    expected : [10] f32;
+    
+    training_example_count := dataloader_get_count(dataloader);
+    
+    past_100_correct := 0;
+    for i: 10 {
+        for ex: training_example_count {
+            dataloader_get_item(dataloader, ex, input, ~~ expected);
+            
+            neural_net_forward(nn, ~~ input);
+            neural_net_backward(nn, ~~ expected, criterion);
+            
+            // The optimizing step should be put here.
+            
+            label, _   := array.greatest(expected);
+            prediction := neural_net_get_prediction(nn);
+            if prediction == label do past_100_correct += 1;
+            
+            if ex % 100 == 0 {
+                print_colored_array :: (arr: [] $T, color_idx: i32, color_code := 94) {
+                    for i: arr.count {
+                        if i == color_idx {
+                            printf("\x1b[%im", color_code);
+                            print(arr[i]);
+                            print("\x1b[0m ");
+                        } else {
+                            print(arr[i]);
+                            print(" ");
+                        }
+                    }
+                    print("\n");
+                }
+                
+                color := 94;
+                if prediction != label do color = 91;
+                
+                output := neural_net_get_output(nn);
+                
+                print_colored_array(cast([] f32) expected, label, color);
+                print_colored_array(output, prediction, color);
+                
+                loss := neural_net_loss(nn, ~~ expected, criterion);
+                printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
+                
+                past_100_correct = 0;
+                
+                /*
+                if ex % 10000 == 0 {
+                    println("Saving neural network...");
+                    neural_net_save(nn, "data/test_4.nn");
+                }
+                */
+            }
+        }
+    }
+}
+
+
+main :: (args: [] cstr) {
+    println("Hello World!");
+    
+    cifar10_dataloader := cifar10_create();
+    defer cifar10_close(^cifar10_dataloader);
+}
+\ No newline at end of file
diff --git a/src/mnist.onyx b/src/mnist.onyx

index b39141929054b9a1f40dd1af3d1ec704a942c7f8..155542a6ca3fd3856fa419e6224f22fc1e6e3461 100644 (file)
--- a/src/mnist.onyx
+++ b/src/mnist.onyx
@@ -77,6 +77,8 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio
              neural_net_forward(nn, ~~ input);
              neural_net_backward(nn, ~~ expected, criterion);
              
+            // The optimizing step should be put here.
+            
              label, _   := array.greatest(expected);
              prediction := neural_net_get_prediction(nn);
              if prediction == label do past_100_correct += 1;
@@ -108,11 +110,13 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio
                  printf("Loss: %f         Correct: %i / 100\n", cast(f32) loss, past_100_correct);
  
                  past_100_correct = 0;
-
+                
+                /*
                  if ex % 10000 == 0 {
                      println("Saving neural network...");
-                    neural_net_save(nn, "data/test_3.nn");
+                    neural_net_save(nn, "data/test_4.nn");
                  }
+                */
              }
          }
      }
@@ -123,8 +127,8 @@ main :: (args: [] cstr) {
      // main_allocator := context.allocator;
      // context.allocator = alloc.log.logging_allocator(^main_allocator);
  
-//    nn := neural_net_load("data/test_2.nn");
-    nn := make_neural_net(28 * 28, 1024, 256, 100, 10);
+    //nn := neural_net_load("data/test_3.nn");
+    nn := make_neural_net(28 * 28, 512, 256, 100, 10);
      defer neural_net_free(^nn);
  
      random.set_seed(5234);
diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx

index c50f4e541d0794d64527e6d5c0cb708716d96cd6..e91535a12848ac250d0d865b38d8b86f120a58e8 100644 (file)
--- a/src/neuralnet.onyx
+++ b/src/neuralnet.onyx
@@ -1,9 +1,14 @@
  use package core
  
+//
+// General purpose Multi-Layer Perceptron (MLP)
+//
+
  NeuralNet :: struct {
      layers : [] Layer;
  
-    // CLEANUP: Move these to core.alloc, so the nesting isn't nearly as terrible.
+    // CLEANUP(Brendan Hansen): Move all allocators to core.alloc,
+    // so the nesting isn't nearly as terrible.
      layer_arena : alloc.arena.ArenaState;
  }
  
@@ -47,14 +52,16 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion:
              "Expected output does not have the same size as the last layer.");
  
      LEARNING_RATE :: cast(f32) 0.01;
-
+    
+    // NOTE(Brendan Hansen):
      // Iterating backwards through the layers (hence the name "back propagation")
      // The reason this is necessary is because we need to know the derivatives of
      // neurons in the next layer to compute the derivatives of the current layers
      // neurons. This is what makes this algorithm not exponentially slow.
      while i := layers.count - 1; i >= 1 {
          defer i -= 1;
-
+        
+        // NOTE(Brendan Hansen):
          // For every neuron, we need to calculate its corresponding "delta", which is
          // kind of an ambiguous term here. It specifically means the partial derivative
          // of the the loss with respect to the weighted sum of the previous layers
@@ -65,24 +72,21 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion:
          if i == layers.count - 1 {
              criterion.compute_deltas(layers[i].deltas, layers[i].neurons, expected_output);
              
-        } else {
-            for j: layers[i].neurons.count {
-                d_neuron: f32 = 0;
-                for k: layers[i + 1].neurons.count {
-                    d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j];
-                }
-
-                layers[i].deltas[j] = d_neuron;
+            // NOTE(Brendan Hansen): 
+            // Here we multiply by the derivative of the activation function for each neuron.
+            // This is done in the layer_backward function, but since that isn't called for the
+            // last layer, it is necessary to do it here.
+            for j: layers[i].deltas.count {
+                d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
+                layers[i].deltas[j] *= d_sigmoid_value;
              }
-        }
-        
-        // Here we multiply by the derivative of the activation function for each neuron.
-        for j: layers[i].deltas.count {
-            d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
-            layers[i].deltas[j] *= d_sigmoid_value;
+            
+        } else {
+            layer_backward(^layers[i], ^layers[i + 1]);
          }
      }
-
+    
+    // NOTE(Brendan Hansen): 
      // Once all the deltas are computed, we can use them to compute the actual
      // derivatives and update the biases and weights.
      // This part is responsible for optimization, and can easily be swapped out.
@@ -184,6 +188,21 @@ layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) {
      }
  }
  
+layer_backward :: (use layer: ^Layer, next_layer: ^Layer) {
+    for j: neurons.count {
+        d_neuron: f32 = 0;
+        for k: next_layer.neurons.count {
+            d_neuron += next_layer.deltas[k] * next_layer.weights[k][j];
+        }
+        
+        d_sigmoid_value := activation.backward(neurons[j], pre_activation_neurons[j]);
+        
+        // This could easily become '+=', which would allow for an accumulated gradient,
+        // before taking a step.
+        deltas[j] = d_neuron * d_sigmoid_value;
+    }
+}
+
  
  Onyx_NN_Magic_Number :: 0x4E4E584F
  
@@ -391,9 +410,6 @@ mean_squared_error := Criterion.{
      },
      
      compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
-        // Leaving the assert turned off for right now.
-        // assert(predictions.count == expected.count && expected.count == deltas.count, "Expected output does not have the same size as predictions.");
-        
          for j: deltas.count {
              deltas[j] = 2 * (expected[j] - predictions[j]) / ~~expected.count;
          }
@@ -415,13 +431,11 @@ mean_absolute_error := Criterion.{
      },
      
      compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
-        // Leaving the assert turned off for right now.
-        // assert(predictions.count == expected.count, "Expected output does not have the same size as predictions.");
-        
          for j: deltas.count {
              deltas[j] = 1.0f;
              if expected[j] < predictions[j] do deltas[j] = -1.0f;
              
+            // TODO(Brendan Hansen):
              // Technically, this division should be here, but it doesn't appear to be helping the gradient descent.
              deltas[j] /= cast(f32) expected.count;
          }
@@ -435,6 +449,7 @@ mean_absolute_error := Criterion.{
  //
  // Very basic datastructure that represents something you can loader data out of.
  // Specifically, an input and output at a particular index.
+//
  
  DataLoader :: struct {
      vtable : ^DataLoader_Functions;
author	Brendan Hansen <brendan.f.hansen@gmail.com>
	Wed, 27 Jan 2021 04:13:19 +0000 (22:13 -0600)
committer	Brendan Hansen <brendan.f.hansen@gmail.com>
	Tue, 23 Feb 2021 04:00:15 +0000 (22:00 -0600)
project.4coder		patch \| blob \| history
src/cifar10.onyx	[new file with mode: 0644]	patch \| blob
src/mnist.onyx		patch \| blob \| history
src/neuralnet.onyx		patch \| blob \| history