From: Brendan Hansen Date: Wed, 27 Jan 2021 04:13:19 +0000 (-0600) Subject: cleanup; started work on CIFAR-10 dataset X-Git-Url: https://git.brendanfh.com/?a=commitdiff_plain;h=79fdea43b7af8d4900f06b77ca8fd779cd1df450;p=onyx-mnist.git cleanup; started work on CIFAR-10 dataset --- diff --git a/project.4coder b/project.4coder index 9306168..50d3127 100644 --- a/project.4coder +++ b/project.4coder @@ -19,7 +19,7 @@ load_paths = { { load_paths_custom, .os = "mac" }, }; -build_win32 = "\\dev\\onyx\\onyx.exe -V src\\mnist.onyx -o mnist.wasm"; +build_win32 = "\\dev\\onyx\\onyx.exe -V src\\cifar10.onyx -o network.wasm"; build_linux = "/usr/bin/onyx -V src/mnist.onyx -o mnist.wasm"; command_list = { diff --git a/src/cifar10.onyx b/src/cifar10.onyx new file mode 100644 index 0000000..edfbc32 --- /dev/null +++ b/src/cifar10.onyx @@ -0,0 +1,117 @@ +#load "core/std/wasi" + +#load_path "src" +#load "neuralnet" + +use package core + +CIFAR10_DataLoader :: struct { + use data : DataLoader; + + data_file : io.FileStream; +} + +cifar10_create :: (file_location := "data/cifar-10-batches-bin/data_batch_1.bin") -> CIFAR10_DataLoader { + dataset : CIFAR10_DataLoader; + dataset.vtable = ^cifar10_dataloader_functions; + + err : io.Error; + err, dataset.data_file = io.open(file_location); + assert(err == io.Error.None, "Failed to open CIFAR10 dataset file"); + + return dataset; +} + +cifar10_close :: (use dataset: ^CIFAR10_DataLoader) { + io.stream_close(^data_file); +} + +cifar10_dataloader_functions := DataLoader_Functions.{ + get_count = (use dataset: ^CIFAR10_DataLoader) -> u32 { + return 10000; + }, + + get_item = (use dataset: ^CIFAR10_DataLoader, index: u32, input: [] f32, output: [] f32) -> bool { + assert(input.count == 3072, "Input slice was of wrong size. Expected 3072."); + assert(output.count == 10, "Output slice was of wrong size. Expected 10."); + + if index > 10000 do return false; + + location := index * (3072 + 1); + sample : [3072 + 1] u8; + _, bytes_read := io.stream_read_at(^data_file, location, ~~ sample); + + label := ~~sample[0]; + // TODO(Brendan Hansen): NOT DONE + } +}; + + +// TODO(Brendan Hansen): This was copied from mnist.onyx. There should be an easy way to abstract these. +stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterion: Criterion = mean_squared_error) { + input := memory.make_slice(f32, 3072); + defer cfree(input.data); + expected : [10] f32; + + training_example_count := dataloader_get_count(dataloader); + + past_100_correct := 0; + for i: 10 { + for ex: training_example_count { + dataloader_get_item(dataloader, ex, input, ~~ expected); + + neural_net_forward(nn, ~~ input); + neural_net_backward(nn, ~~ expected, criterion); + + // The optimizing step should be put here. + + label, _ := array.greatest(expected); + prediction := neural_net_get_prediction(nn); + if prediction == label do past_100_correct += 1; + + if ex % 100 == 0 { + print_colored_array :: (arr: [] $T, color_idx: i32, color_code := 94) { + for i: arr.count { + if i == color_idx { + printf("\x1b[%im", color_code); + print(arr[i]); + print("\x1b[0m "); + } else { + print(arr[i]); + print(" "); + } + } + print("\n"); + } + + color := 94; + if prediction != label do color = 91; + + output := neural_net_get_output(nn); + + print_colored_array(cast([] f32) expected, label, color); + print_colored_array(output, prediction, color); + + loss := neural_net_loss(nn, ~~ expected, criterion); + printf("Loss: %f Correct: %i / 100\n", cast(f32) loss, past_100_correct); + + past_100_correct = 0; + + /* + if ex % 10000 == 0 { + println("Saving neural network..."); + neural_net_save(nn, "data/test_4.nn"); + } + */ + } + } + } +} + + +main :: (args: [] cstr) { + println("Hello World!"); + + cifar10_dataloader := cifar10_create(); + defer cifar10_close(^cifar10_dataloader); +} \ No newline at end of file diff --git a/src/mnist.onyx b/src/mnist.onyx index b391419..155542a 100644 --- a/src/mnist.onyx +++ b/src/mnist.onyx @@ -77,6 +77,8 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio neural_net_forward(nn, ~~ input); neural_net_backward(nn, ~~ expected, criterion); + // The optimizing step should be put here. + label, _ := array.greatest(expected); prediction := neural_net_get_prediction(nn); if prediction == label do past_100_correct += 1; @@ -108,11 +110,13 @@ stocastic_gradient_descent :: (nn: ^NeuralNet, dataloader: ^DataLoader, criterio printf("Loss: %f Correct: %i / 100\n", cast(f32) loss, past_100_correct); past_100_correct = 0; - + + /* if ex % 10000 == 0 { println("Saving neural network..."); - neural_net_save(nn, "data/test_3.nn"); + neural_net_save(nn, "data/test_4.nn"); } + */ } } } @@ -123,8 +127,8 @@ main :: (args: [] cstr) { // main_allocator := context.allocator; // context.allocator = alloc.log.logging_allocator(^main_allocator); -// nn := neural_net_load("data/test_2.nn"); - nn := make_neural_net(28 * 28, 1024, 256, 100, 10); + //nn := neural_net_load("data/test_3.nn"); + nn := make_neural_net(28 * 28, 512, 256, 100, 10); defer neural_net_free(^nn); random.set_seed(5234); diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx index c50f4e5..e91535a 100644 --- a/src/neuralnet.onyx +++ b/src/neuralnet.onyx @@ -1,9 +1,14 @@ use package core +// +// General purpose Multi-Layer Perceptron (MLP) +// + NeuralNet :: struct { layers : [] Layer; - // CLEANUP: Move these to core.alloc, so the nesting isn't nearly as terrible. + // CLEANUP(Brendan Hansen): Move all allocators to core.alloc, + // so the nesting isn't nearly as terrible. layer_arena : alloc.arena.ArenaState; } @@ -47,14 +52,16 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion: "Expected output does not have the same size as the last layer."); LEARNING_RATE :: cast(f32) 0.01; - + + // NOTE(Brendan Hansen): // Iterating backwards through the layers (hence the name "back propagation") // The reason this is necessary is because we need to know the derivatives of // neurons in the next layer to compute the derivatives of the current layers // neurons. This is what makes this algorithm not exponentially slow. while i := layers.count - 1; i >= 1 { defer i -= 1; - + + // NOTE(Brendan Hansen): // For every neuron, we need to calculate its corresponding "delta", which is // kind of an ambiguous term here. It specifically means the partial derivative // of the the loss with respect to the weighted sum of the previous layers @@ -65,24 +72,21 @@ neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] f32, criterion: if i == layers.count - 1 { criterion.compute_deltas(layers[i].deltas, layers[i].neurons, expected_output); - } else { - for j: layers[i].neurons.count { - d_neuron: f32 = 0; - for k: layers[i + 1].neurons.count { - d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j]; - } - - layers[i].deltas[j] = d_neuron; + // NOTE(Brendan Hansen): + // Here we multiply by the derivative of the activation function for each neuron. + // This is done in the layer_backward function, but since that isn't called for the + // last layer, it is necessary to do it here. + for j: layers[i].deltas.count { + d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]); + layers[i].deltas[j] *= d_sigmoid_value; } - } - - // Here we multiply by the derivative of the activation function for each neuron. - for j: layers[i].deltas.count { - d_sigmoid_value := layers[i].activation.backward(layers[i].neurons[j], layers[i].pre_activation_neurons[j]); - layers[i].deltas[j] *= d_sigmoid_value; + + } else { + layer_backward(^layers[i], ^layers[i + 1]); } } - + + // NOTE(Brendan Hansen): // Once all the deltas are computed, we can use them to compute the actual // derivatives and update the biases and weights. // This part is responsible for optimization, and can easily be swapped out. @@ -184,6 +188,21 @@ layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) { } } +layer_backward :: (use layer: ^Layer, next_layer: ^Layer) { + for j: neurons.count { + d_neuron: f32 = 0; + for k: next_layer.neurons.count { + d_neuron += next_layer.deltas[k] * next_layer.weights[k][j]; + } + + d_sigmoid_value := activation.backward(neurons[j], pre_activation_neurons[j]); + + // This could easily become '+=', which would allow for an accumulated gradient, + // before taking a step. + deltas[j] = d_neuron * d_sigmoid_value; + } +} + Onyx_NN_Magic_Number :: 0x4E4E584F @@ -391,9 +410,6 @@ mean_squared_error := Criterion.{ }, compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) { - // Leaving the assert turned off for right now. - // assert(predictions.count == expected.count && expected.count == deltas.count, "Expected output does not have the same size as predictions."); - for j: deltas.count { deltas[j] = 2 * (expected[j] - predictions[j]) / ~~expected.count; } @@ -415,13 +431,11 @@ mean_absolute_error := Criterion.{ }, compute_deltas = (deltas: [] f32, predictions: [] f32, expected: [] f32) { - // Leaving the assert turned off for right now. - // assert(predictions.count == expected.count, "Expected output does not have the same size as predictions."); - for j: deltas.count { deltas[j] = 1.0f; if expected[j] < predictions[j] do deltas[j] = -1.0f; + // TODO(Brendan Hansen): // Technically, this division should be here, but it doesn't appear to be helping the gradient descent. deltas[j] /= cast(f32) expected.count; } @@ -435,6 +449,7 @@ mean_absolute_error := Criterion.{ // // Very basic datastructure that represents something you can loader data out of. // Specifically, an input and output at a particular index. +// DataLoader :: struct { vtable : ^DataLoader_Functions;