diff --git a/src/activation_function.hpp b/src/activation_function.hpp index 79a69b8..fd20741 100644 --- a/src/activation_function.hpp +++ b/src/activation_function.hpp @@ -3,17 +3,49 @@ #include #include +#include +#include /** * Functor to set the activation function as a Sigmoid function */ struct Sigmoid { - double operator()(double z) { return 1 / (1 + exp(-z)); }; + void operator()(std::vector &z) { + for (size_t i = 0; i < z.size(); i++) { + z[i] = 1 / (1 + exp(-z[i])); + }; + }; + double init_stddev(int n) { return sqrt(1.0 / n); }; }; /** * Functor to set the activation function as Rectified Linear Unit */ struct ReLU { - double operator()(double z) { return std::max(z, 0.0); }; + void operator()(std::vector &z) { + for (size_t i = 0; i < z.size(); i++) { + z[i] = std::max(0.0f, z[i]); + }; + }; + double init_stddev(int n) { return sqrt(2.0 / n); }; }; + +/** + * SoftMax Activation function. + * This is generally used in the final output layer + */ +struct SoftMax { + void operator()(std::vector &z) { + float zmax = *std::max_element(z.begin(), z.end()); + float sum = 0.0; + for (size_t i = 0; i < z.size(); i++) { + z[i] = exp(z[i] - zmax); + sum += z[i]; + }; + for (size_t i = 0; i < z.size(); i++) { + z[i] = z[i] / sum; + }; + }; + double init_stddev(int n) { return sqrt(1.0 / n); }; +}; + #endif diff --git a/src/neural_net.cpp b/src/neural_net.cpp new file mode 100644 index 0000000..c2ba86c --- /dev/null +++ b/src/neural_net.cpp @@ -0,0 +1,30 @@ +#include "neural_net.hpp" +#include +#include +#include +#include + +template +NeuralNet::NeuralNet(std::vector &layer_sizes) + : m_sizes(layer_sizes) { + int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), 0, + std::plus()); + // Initialize the activation function + m_activation_func = ActivationFunction(); + + // Create random sampling device + std::random_device rd{}; + std::mt19937 gen{rd()}; + std::normal_distribution dist{0.0, 1.0}; + + // Initialize the weights + m_weights.reserve(total_neurons); + int start_idx = 0; + for (auto size : m_sizes) { + for (int i = 0; i < size; i++) { + m_weights[i + start_idx] = + dist(gen) * m_activation_func.init_stddev(size); + } + start_idx += size; + } +} diff --git a/src/neural_net.hpp b/src/neural_net.hpp index 32bb5fb..4e8cf34 100644 --- a/src/neural_net.hpp +++ b/src/neural_net.hpp @@ -1,5 +1,15 @@ #ifndef NEURAL_NET_H #define NEURAL_NET_H -template class NeuralNet : ActivationFunction {}; +#include +template class NeuralNet { +public: + NeuralNet(std::vector &layer_sizes); + +private: + ActivationFunction m_activation_func; + std::vector m_sizes; + std::vector m_weights; + std::vector feed_forward(std::vector x); +}; #endif diff --git a/tests/unit_tests/test_activation_functions.cpp b/tests/unit_tests/test_activation_functions.cpp index c444f78..2e47fa7 100644 --- a/tests/unit_tests/test_activation_functions.cpp +++ b/tests/unit_tests/test_activation_functions.cpp @@ -1,41 +1,75 @@ #include #include "../../src/activation_function.hpp" #include +#include TEST(ActivationFunctionTest, SigmoidTest) { Sigmoid sigmoid; + std::vector input = {0.0, 10.0, -10.0, 1.0, -1.0}; + std::vector expected = { + 0.5, + 0.9999546, + 0.0000454, + 1.0 / (1.0 + exp(-1.0)), + 1.0 / (1.0 + exp(1.0)) + }; - // Test sigmoid at x = 0 (should be 0.5) - EXPECT_NEAR(sigmoid(0.0), 0.5, 1e-6); + std::vector test = input; + sigmoid(test); - // Test sigmoid at large positive value (should approach 1) - EXPECT_NEAR(sigmoid(10.0), 1.0, 1e-4); + ASSERT_EQ(test.size(), expected.size()); + for (size_t i = 0; i < test.size(); i++) { + EXPECT_NEAR(test[i], expected[i], 1e-6); + } - // Test sigmoid at large negative value (should approach 0) - EXPECT_NEAR(sigmoid(-10.0), 0.0, 1e-4); - - // Test sigmoid at x = 1 - EXPECT_NEAR(sigmoid(1.0), 1.0 / (1.0 + exp(-1.0)), 1e-6); - - // Test sigmoid at x = -1 - EXPECT_NEAR(sigmoid(-1.0), 1.0 / (1.0 + exp(1.0)), 1e-6); + // Test initialization standard deviation + EXPECT_NEAR(sigmoid.init_stddev(100), sqrt(1.0/100), 1e-6); } TEST(ActivationFunctionTest, ReLUTest) { ReLU relu; + std::vector input = {0.0, 5.0, -5.0, 0.0001, -0.0001}; + std::vector expected = {0.0, 5.0, 0.0, 0.0001, 0.0}; - // Test ReLU at x = 0 (should be 0) - EXPECT_DOUBLE_EQ(relu(0.0), 0.0); + std::vector test = input; + relu(test); - // Test ReLU at positive value (should be same value) - EXPECT_DOUBLE_EQ(relu(5.0), 5.0); + ASSERT_EQ(test.size(), expected.size()); + for (size_t i = 0; i < test.size(); i++) { + EXPECT_FLOAT_EQ(test[i], expected[i]); + } - // Test ReLU at negative value (should be 0) - EXPECT_DOUBLE_EQ(relu(-5.0), 0.0); - - // Test ReLU at very small positive value - EXPECT_DOUBLE_EQ(relu(0.0001), 0.0001); - - // Test ReLU at very small negative value - EXPECT_DOUBLE_EQ(relu(-0.0001), 0.0); + // Test initialization standard deviation + EXPECT_NEAR(relu.init_stddev(100), sqrt(2.0/100), 1e-6); +} + +TEST(ActivationFunctionTest, SoftMaxTest) { + SoftMax softmax; + std::vector input = {1.0, 2.0, 3.0, 4.0, 1.0}; + std::vector test = input; + + softmax(test); + + // Test properties of softmax + ASSERT_EQ(test.size(), input.size()); + + // Sum should be approximately 1 + float sum = 0.0; + for (float val : test) { + sum += val; + // All values should be between 0 and 1 + EXPECT_GE(val, 0.0); + EXPECT_LE(val, 1.0); + } + EXPECT_NEAR(sum, 1.0, 1e-6); + + // Higher input should lead to higher output + for (size_t i = 0; i < test.size() - 1; i++) { + if (input[i] < input[i + 1]) { + EXPECT_LT(test[i], test[i + 1]); + } + } + + // Test initialization standard deviation + EXPECT_NEAR(softmax.init_stddev(100), sqrt(1.0/100), 1e-6); }