{ load_paths_custom, .os = "mac" },
};
-build_win32 = "\\dev\\onyx\\onyx.exe -V src\\mnist.onyx -o mnist.wasm";
+build_win32 = "\\dev\\onyx\\onyx.exe -V src\\cifar10.onyx -o network.wasm";
build_linux = "/usr/bin/onyx -V src/mnist.onyx -o mnist.wasm";
command_list = {
--- /dev/null
+#load "core/std/wasi"
+
+#load_path "src"
+#load "neuralnet"
+
+use package core
+
+CIFAR10_DataLoader :: struct {
+ use data : DataLoader;
+
+ data_file : io.FileStream;
+}
+
+cifar10_create :: (file_location := "data/cifar-10-batches-bin/data_batch_1.bin") -> CIFAR10_DataLoader {
+ dataset : CIFAR10_DataLoader;
+ dataset.vtable = ^cifar10_dataloader_functions;
+
+ err : io.Error;
+ err, dataset.data_file = io.open(file_location);
+ assert(err == io.Error.None, "Failed to open CIFAR10 dataset file");
+
+ return dataset;
+}
+
+cifar10_close :: (use dataset: ^CIFAR10_DataLoader) {
+ io.stream_close(^data_file);
+}
+
+cifar10_dataloader_functions := DataLoader_Functions.{
+ get_count = (use dataset: ^CIFAR10_DataLoader) -> u32 {
+ return 10000;
+ },
+
+ get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool {
+ assert(input.count == 3072, "Input slice was of wrong size. Expected 3072.");
+ assert(output.count == 10, "Output slice was of wrong size. Expected 10.");
+
+ if index > 10000 do return false;
+
+ location := index * (3072 + 1);
+ sample : [3072 + 1] u8;
+ _, bytes_read := io.stream_read_at(^data_file, location, ~~ sample);
+
+ label := ~~sample[0];
+ // TODO(Brendan Hansen): NOT DONE
+ }
+};
+
+
+// TODO(Brendan Hansen): This was copied from mnist.onyx. There should be an easy way to abstract these.
+stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterion: Criterion = mean_squared_error) {
+ input := memory.make_slice(f32, 3072);
+ defer cfree(input.data);
+ expected : [10] f32;
+
+ training_example_count := dataloader_get_count(dataloader);
+
+ past_100_correct := 0;
+ for i: 10 {
+ for ex: training_example_count {
+ dataloader_get_item(dataloader, ex, input, ~~ expected);
+
+ neural_net_forward(nn, ~~ input);
+ neural_net_backward(nn, ~~ expected, criterion);
+
+ // The optimizing step should be put here.
+
+ label, _ := array.greatest(expected);
+ prediction := neural_net_get_prediction(nn);
+ if prediction == label do past_100_correct += 1;
+
+ if ex % 100 == 0 {
+ print_colored_array :: (arr: [] $T, color_idx: i32, color_code := 94) {
+ for i: arr.count {
+ if i == color_idx {
+ printf("\x1b[%im", color_code);
+ print(arr[i]);
+ print("\x1b[0m ");
+ } else {
+ print(arr[i]);
+ print(" ");
+ }
+ }
+ print("\n");
+ }
+
+ color := 94;
+ if prediction != label do color = 91;
+
+ output := neural_net_get_output(nn);
+
+ print_colored_array(cast([] f32) expected, label, color);
+ print_colored_array(output, prediction, color);
+
+ loss := neural_net_loss(nn, ~~ expected, criterion);
+ printf("Loss: %f Correct: %i / 100\n", cast(f32) loss, past_100_correct);
+
+ past_100_correct = 0;
+
+ /*
+ if ex % 10000 == 0 {
+ println("Saving neural network...");
+ neural_net_save(nn, "data/test_4.nn");
+ }
+ */
+ }
+ }
+ }
+}
+
+
+main :: (args: [] cstr) {
+ println("Hello World!");
+
+ cifar10_dataloader := cifar10_create();
+ defer cifar10_close(^cifar10_dataloader);
+}
\ No newline at end of file
neural_net_forward(nn, ~~ input);
neural_net_backward(nn, ~~ expected, criterion);
+ // The optimizing step should be put here.
+
label, _ := array.greatest(expected);
prediction := neural_net_get_prediction(nn);
if prediction == label do past_100_correct += 1;
printf("Loss: %f Correct: %i / 100\n", cast(f32) loss, past_100_correct);
past_100_correct = 0;
-
+
+ /*
if ex % 10000 == 0 {
println("Saving neural network...");
- neural_net_save(nn, "data/test_3.nn");
+ neural_net_save(nn, "data/test_4.nn");
}
+ */
}
}
}
// main_allocator := context.allocator;
// context.allocator = alloc.log.logging_allocator(^main_allocator);
-// nn := neural_net_load("data/test_2.nn");
- nn := make_neural_net(28 * 28, 1024, 256, 100, 10);
+ //nn := neural_net_load("data/test_3.nn");
+ nn := make_neural_net(28 * 28, 512, 256, 100, 10);
defer neural_net_free(^nn);
random.set_seed(5234);
use package core
+//
+// General purpose Multi-Layer Perceptron (MLP)
+//
+
NeuralNet :: struct {
layers : [] Layer;
- // CLEANUP: Move these to core.alloc, so the nesting isn't nearly as terrible.
+ // CLEANUP(Brendan Hansen): Move all allocators to core.alloc,
+ // so the nesting isn't nearly as terrible.
layer_arena : alloc.arena.ArenaState;
}
"Expected output does not have the same size as the last layer.");
LEARNING_RATE :: cast(f32) 0.01;
-
+
+ // NOTE(Brendan Hansen):
// Iterating backwards through the layers (hence the name "back propagation")
// The reason this is necessary is because we need to know the derivatives of
// neurons in the next layer to compute the derivatives of the current layers
// neurons. This is what makes this algorithm not exponentially slow.
while i := layers.count - 1; i >= 1 {
defer i -= 1;
-
+
+ // NOTE(Brendan Hansen):
// For every neuron, we need to calculate its corresponding "delta", which is
// kind of an ambiguous term here. It specifically means the partial derivative
// of the the loss with respect to the weighted sum of the previous layers
if i == layers.count - 1 {
criterion.compute_deltas(layers[i].deltas, layers[i].neurons, expected_output);
- } else {
- for j: layers[i].neurons.count {
- d_neuron: f32 = 0;
- for k: layers[i + 1].neurons.count {
- d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j];
- }
-
- layers[i].deltas[j] = d_neuron;
+ // NOTE(Brendan Hansen):
+ // Here we multiply by the derivative of the activation function for each neuron.
+ // This is done in the layer_backward function, but since that isn't called for the
+ // last layer, it is necessary to do it here.
+ for j: layers[i].deltas.count {
+ d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
+ layers[i].deltas[j] *= d_sigmoid_value;
}
- }
-
- // Here we multiply by the derivative of the activation function for each neuron.
- for j: layers[i].deltas.count {
- d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]);
- layers[i].deltas[j] *= d_sigmoid_value;
+
+ } else {
+ layer_backward(^layers[i], ^layers[i + 1]);
}
}
-
+
+ // NOTE(Brendan Hansen):
// Once all the deltas are computed, we can use them to compute the actual
// derivatives and update the biases and weights.
// This part is responsible for optimization, and can easily be swapped out.
}
}
+layer_backward :: (use layer: ^Layer, next_layer: ^Layer) {
+ for j: neurons.count {
+ d_neuron: f32 = 0;
+ for k: next_layer.neurons.count {
+ d_neuron += next_layer.deltas[k] * next_layer.weights[k][j];
+ }
+
+ d_sigmoid_value := activation.backward(neurons[j], pre_activation_neurons[j]);
+
+ // This could easily become '+=', which would allow for an accumulated gradient,
+ // before taking a step.
+ deltas[j] = d_neuron * d_sigmoid_value;
+ }
+}
+
Onyx_NN_Magic_Number :: 0x4E4E584F
},
compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
- // Leaving the assert turned off for right now.
- // assert(predictions.count == expected.count && expected.count == deltas.count, "Expected output does not have the same size as predictions.");
-
for j: deltas.count {
deltas[j] = 2 * (expected[j] - predictions[j]) / ~~expected.count;
}
},
compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) {
- // Leaving the assert turned off for right now.
- // assert(predictions.count == expected.count, "Expected output does not have the same size as predictions.");
-
for j: deltas.count {
deltas[j] = 1.0f;
if expected[j] < predictions[j] do deltas[j] = -1.0f;
+ // TODO(Brendan Hansen):
// Technically, this division should be here, but it doesn't appear to be helping the gradient descent.
deltas[j] /= cast(f32) expected.count;
}
//
// Very basic datastructure that represents something you can loader data out of.
// Specifically, an input and output at a particular index.
+//
DataLoader :: struct {
vtable : ^DataLoader_Functions;