From 529eb0ff32e734e452c555ac87556a1fcba3f597 Mon Sep 17 00:00:00 2001 From: Brendan Hansen Date: Thu, 21 Jan 2021 17:21:05 -0600 Subject: [PATCH] training one example works! --- src/mnist.onyx | 16 ++++++++++------ src/neuralnet.onyx | 48 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/mnist.onyx b/src/mnist.onyx index 4fa73ec..ee42222 100644 --- a/src/mnist.onyx +++ b/src/mnist.onyx @@ -43,12 +43,16 @@ main :: (args: [] cstr) { // CLEANUP: The double cast that is necessary here is gross. for i: input.count do input[i] = (cast(f32) cast(u32) example[i]) / 255; - neural_net_forward(^nn, ~~ input); - output := neural_net_get_output(^nn); + for i: 500 { + neural_net_forward(^nn, ~~ input); + output := neural_net_get_output(^nn); + for o: output do printf("%f ", o); + print("\n"); - for o: output do println(o); + expected := f32.[ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 ]; + loss := neural_net_loss(^nn, ~~ expected); + printf("MSE loss: %f\n", loss); - expected := f32.[ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 ]; - loss := neural_net_loss(^nn, ~~ expected); - printf("MSE loss: %f\n", loss); + neural_net_backward(^nn, ~~ expected); + } } \ No newline at end of file diff --git a/src/neuralnet.onyx b/src/neuralnet.onyx index e1cd81a..c49bcb6 100644 --- a/src/neuralnet.onyx +++ b/src/neuralnet.onyx @@ -42,7 +42,39 @@ neural_net_forward :: (use nn: ^NeuralNet, input: [] float) { } neural_net_backward :: (use nn: ^NeuralNet, expected_output: [] float) { + assert(layers[layers.count - 1].neurons.count == expected_output.count, + "Expected output does not have the same size as the last layer."); + + LEARNING_RATE :: cast(float) 0.01; + + while i := layers.count - 1; i >= 1 { + defer i -= 1; + for j: layers[i].neurons.count { + sigmoid_value := layers[i].neurons[j]; + d_sigmoid_value := sigmoid_value * (1 - sigmoid_value); + + if i == layers.count - 1 { + layers[i].deltas[j] = 2 * (expected_output[j] - sigmoid_value) * d_sigmoid_value; + } else { + d_neuron: float = 0; + for k: layers[i + 1].neurons.count { + d_neuron += layers[i + 1].deltas[k] * layers[i + 1].weights[k][j]; + } + layers[i].deltas[j] = d_neuron * d_sigmoid_value; + } + } + } + + for i: 1 .. layers.count { + for j: layers[i].neurons.count { + layers[i].biases[j] += LEARNING_RATE * layers[i].deltas[j]; + + for k: layers[i].weights[j].count { + layers[i].weights[j][k] += LEARNING_RATE * layers[i].deltas[j] * layers[i].neurons[k]; + } + } + } } neural_net_get_output :: (use nn: ^NeuralNet) -> [] float { @@ -62,18 +94,22 @@ neural_net_loss :: (use nn: ^NeuralNet, expected_output: [] float) -> float { squared_sum += diff * diff; } - loss := math.sqrt(squared_sum); + loss := squared_sum / ~~expected_output.count; return loss; } Layer :: struct { neurons : [] float; + biases : [] float; weights : [][] float; // CLEANUP: Make this a rank 1 slice + deltas : [] float; } init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocator := context.allocator) { neurons = memory.make_slice(float, layer_size, allocator); + deltas = memory.make_slice(float, layer_size, allocator); + biases = memory.make_slice(float, layer_size, allocator); if prev_layer_size > 0 { weights = memory.make_slice(#type [] float, layer_size, allocator); @@ -82,21 +118,23 @@ init_layer :: (use layer: ^Layer, layer_size: u32, prev_layer_size: u32, allocat *weight = memory.make_slice(float, prev_layer_size, allocator); } - randomize_weights(layer); + randomize_weights_and_biases(layer); } } -randomize_weights :: (use layer: ^Layer) { +randomize_weights_and_biases :: (use layer: ^Layer) { for ^weight: weights { for ^w: *weight { - *w = random.float(-1.0f, 1.0f); + *w = random.float(-0.5f, -0.5f); } } + + for ^bias: biases do *bias = random.float(-0.5f, 0.5f); } layer_forward :: (use layer: ^Layer, prev_layer: ^Layer) { for i: neurons.count { - neurons[i] = 0; + neurons[i] = biases[i]; for j: weights[i].count { neurons[i] += prev_layer.neurons[j] * weights[i][j]; } -- 2.25.1