diff --git a/CMakeLists.txt b/CMakeLists.txt index 37d8dc3..466773a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,12 @@ add_subdirectory(tests) target_link_libraries(${CMAKE_PROJECT_NAME}_run ${CMAKE_PROJECT_NAME}_lib) +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + target_link_libraries(${CMAKE_PROJECT_NAME}_run OpenMP::OpenMP_CXX) +endif() + + # Doxygen Build option(BUILD_DOC "Build Documentation" ON) diff --git a/src/activation_function.hpp b/src/activation_function.hpp index 34269cf..3a748f2 100644 --- a/src/activation_function.hpp +++ b/src/activation_function.hpp @@ -13,7 +13,11 @@ struct Sigmoid { z[i] = 1 / (1 + exp(-z[i])); }; }; - double init_stddev(int n) { return sqrt(1.0 / n); }; + float init_stddev(int n) { return sqrt(1.0 / n); }; + float derivative(float x) { + float exp_x = exp(-x); + return exp_x / pow(exp_x + 1.0, 2.0); + } }; /** @@ -25,7 +29,14 @@ struct ReLU { z[i] = std::max(0.0f, z[i]); }; }; - double init_stddev(int n) { return sqrt(2.0 / n); }; + float init_stddev(int n) { return sqrt(2.0 / n); }; + float derivative(float x) { + if (x < 0) { + return 0; + } else { + return 1; + } + }; }; /** @@ -44,7 +55,7 @@ struct SoftMax { z[i] = z[i] / sum; }; }; - double init_stddev(int n) { return sqrt(1.0 / n); }; + float init_stddev(int n) { return sqrt(1.0 / n); }; }; #endif diff --git a/src/matrix.hpp b/src/matrix.hpp new file mode 100644 index 0000000..10362ed --- /dev/null +++ b/src/matrix.hpp @@ -0,0 +1,111 @@ +#ifndef MATRIX_H +#define MATRIX_H + +#include +#include + +template class Matrix { +public: + // Create an empty matrix + Matrix() : m_rows(0), m_cols(0) {} + + // Create a matrix with specified dimensions and initialize with value + Matrix(size_t rows, size_t cols, T value) + : m_rows(rows), m_cols(cols), m_data(rows * cols, value) {} + + // Create a matrix from a 1d vector using move semantics + Matrix(size_t rows, size_t cols, std::vector data) + : m_rows(rows), m_cols(cols), m_data(data) { + + if (m_rows * m_cols != m_data.size()) { + throw std::invalid_argument( + "The size of input data doesn't match the provided dimensions"); + } + } + + // Create a matrix from a vector of vectors + Matrix(const std::vector> &data) { + if (data.empty()) { + m_rows = 0; + m_cols = 0; + return; + } + + m_rows = data.size(); + m_cols = data[0].size(); + m_data.resize(m_rows * m_cols); + + for (size_t i = 0; i < m_rows; ++i) { + if (data[i].size() != m_cols) { + throw std::invalid_argument( + "All rows must have the same number of columns"); + } + for (size_t j = 0; j < m_cols; ++j) { + m_data[i * m_cols + j] = data[i][j]; + } + } + } + + // Access element (row, col) + float &operator()(size_t row, size_t col) { + check_bounds(row, col); + return m_data[row * m_cols + col]; + } + + const float &operator()(size_t row, size_t col) const { + check_bounds(row, col); + return m_data[row * m_cols + col]; + } + + // Matrix multiplication + Matrix operator*(const Matrix &other) const { + if (m_cols != other.m_rows) { + throw std::invalid_argument( + "Matrix dimensions do not match for multiplication"); + } + + Matrix result(m_rows, other.m_cols, 0.0); +#pragma omp parallel for + for (size_t i = 0; i < m_rows; ++i) { + for (size_t j = 0; j < other.m_cols; ++j) { + float sum = 0.0f; + for (size_t k = 0; k < m_cols; ++k) { + sum += (*this)(i, k) * other(k, j); + } + result(i, j) = sum; + } + } + return result; + } + + // Get matrix dimensions + size_t rows() const { return m_rows; } + size_t cols() const { return m_cols; } + + // Get raw data access + std::vector &data() { return m_data; } + + // Transpose the matrix + Matrix transpose() const { + Matrix result(m_cols, m_rows); + for (size_t i = 0; i < m_rows; ++i) { + for (size_t j = 0; j < m_cols; ++j) { + result(j, i) = (*this)(i, j); + } + } + return result; + } + +private: + void check_bounds(size_t row, size_t col) const { + if (row >= m_rows || col >= m_cols) { + throw std::out_of_range("Matrix index out of bounds"); + } + } + + size_t m_rows; + size_t m_cols; + std::vector m_data; +}; + +#endif // MATRIX_H diff --git a/src/neural_net.cpp b/src/neural_net.cpp deleted file mode 100644 index c4a302d..0000000 --- a/src/neural_net.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "neural_net.hpp" -#include "utility.hpp" -#include -#include -#include -#include - -template -NeuralNet::NeuralNet(std::vector &layer_sizes) - : m_sizes(layer_sizes) { - int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), 0, - std::plus()); - // Initialize the activation function - m_activation_func = ActivationFunction(); - - // Create random sampling device - std::random_device rd{}; - std::mt19937 gen{rd()}; - std::normal_distribution dist{0.0, 1.0}; - - // Initialize the weights - m_weights.reserve(total_neurons); - int start_idx = 0; - for (auto size : m_sizes) { - for (int i = 0; i < size; i++) { - m_weights[i + start_idx] = - dist(gen) * m_activation_func.init_stddev(size); - } - start_idx += size; - } -} - -/** Pass input vector through the neural network. - * This is a fully connected neural network geometry. - * @param x Input vector - * @return output of feed forward phase - */ -template -std::vector -NeuralNet::feed_forward(std::vector &x) { - std::vector A = x; - int start_idx = 0; - - // Feed each layer forward except the last layer using the user specified - // activation function - for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) { - // Get the iterator range for the current layer - auto layer_start = m_weights.begin() + start_idx; - auto layer_end = m_weights.end() + start_idx + *size; - - std::vector Anew = Utilities::feed_layer( - layer_start, layer_end, &A, m_activation_func); - if (Anew.size() > A.capacity()) { - A.reserve(Anew.size()); - } - std::move(Anew.begin(), Anew.end(), A.begin()); - start_idx += *size; - } - - // Always use soft max for the final layer - auto last_layer_start = m_weights.begin() + start_idx; - auto output = Utilities::feed_layer(last_layer_start, - m_weights.end(), A, m_soft_max); - return output; -} diff --git a/src/neural_net.hpp b/src/neural_net.hpp index c4f7f48..2aae21a 100644 --- a/src/neural_net.hpp +++ b/src/neural_net.hpp @@ -2,14 +2,13 @@ #define NEURAL_NET_H #include "activation_function.hpp" -#include "utility.hpp" +#include "matrix.hpp" #include #include + template class NeuralNet { public: NeuralNet(std::vector &layer_sizes) : m_sizes(layer_sizes) { - int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), - 0, std::plus()); // Initialize the activation function m_activation_func = ActivationFunction(); @@ -18,54 +17,87 @@ public: std::mt19937 gen{rd()}; std::normal_distribution dist{0.0, 1.0}; - // Initialize the weights - m_weights.reserve(total_neurons); - int start_idx = 0; - for (auto size : m_sizes) { - for (int i = 0; i < size; i++) { - m_weights[i + start_idx] = - dist(gen) * m_activation_func.init_stddev(size); + // Initialize weights for each layer connection + for (size_t i = 0; i < m_sizes.size() - 1; i++) { + size_t rows = m_sizes[i + 1]; // neurons in next layer + size_t cols = m_sizes[i]; // neurons in current layer + + // Create and initialize weight matrix + Matrix W(rows, cols, 0.0); + for (size_t j = 0; j < rows; j++) { + for (size_t k = 0; k < cols; k++) { + W(j, k) = dist(gen) * m_activation_func.init_stddev(cols); + } } - start_idx += size; + m_weights.push_back(W); } }; -private: - ActivationFunction m_activation_func; - SoftMax m_soft_max; - std::vector m_sizes; - std::vector m_weights; + // Set new weights for the network + void set_weights(const std::vector> &new_weights) { + // Validate new weights + if (new_weights.empty()) { + throw std::invalid_argument("Weights vector cannot be empty"); + } + + // Validate layer sizes match + if (new_weights.size() != m_weights.size()) { + throw std::invalid_argument( + "Number of weight matrices doesn't match network architecture"); + } + + // Validate layer connectivity + for (size_t i = 0; i < new_weights.size(); i++) { + if (new_weights[i].rows() != m_weights[i].rows()) { + throw std::invalid_argument( + "New weight matrix rows don't match existing architecture"); + } + if (new_weights[i].cols() != m_weights[i].cols()) { + throw std::invalid_argument( + "New weight matrix columns don't match existing architecture"); + } + } + + // Update weights + m_weights = new_weights; + }; /** Pass input vector through the neural network. * This is a fully connected neural network geometry. * @param x Input vector * @return output of feed forward phase */ - std::vector feed_forward(std::vector &x) { - std::vector A = x; - int start_idx = 0; + std::vector feed_forward(const std::vector &x) { + // Convert input vector to matrix + Matrix A = Matrix(x.size(), 1, x); // Feed each layer forward except the last layer using the user specified // activation function - for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) { - // Get the iterator range for the current layer - auto layer_start = m_weights.begin() + start_idx; - auto layer_end = m_weights.end() + start_idx + *size; + for (size_t i = 0; i < m_sizes.size() - 2; i++) { + // Calculate Z = W * A + Matrix Z = m_weights[i] * A; - std::vector Anew = Utilities::feed_layer( - layer_start, layer_end, &A, m_activation_func); - if (Anew.size() > A.capacity()) { - A.reserve(Anew.size()); - } - std::move(Anew.begin(), Anew.end(), A.begin()); - start_idx += *size; + // Apply activation function + m_activation_func(Z.data()); + A = Z; } // Always use soft max for the final layer - auto last_layer_start = m_weights.begin() + start_idx; - auto output = Utilities::feed_layer( - last_layer_start, m_weights.end(), A, m_soft_max); + Matrix Z = m_weights.back() * A; + m_soft_max(Z.data()); + + // Convert final output to vector + std::vector output(Z.rows()); + for (size_t i = 0; i < Z.rows(); i++) { + output[i] = Z(i, 0); + } return output; }; + +private: + ActivationFunction m_activation_func; + SoftMax m_soft_max; + std::vector m_sizes; + std::vector> m_weights; }; #endif diff --git a/src/utility.hpp b/src/utility.hpp index 16cdede..a5d8b4d 100644 --- a/src/utility.hpp +++ b/src/utility.hpp @@ -1,31 +1,5 @@ #ifndef UTILITY_H #define UTILITY_H -#include -#include -#include -#include - -namespace Utilities { - -template -std::vector feed_layer(std::vector::iterator weight_start, - std::vector::iterator weight_end, - std::vector &A, - ActivationFunction activation_func) { - // Calculate the new A vector from the current weights - std::vector Anew; - Anew.reserve(std::distance(weight_start, weight_end)); - std::transform( - weight_start, weight_end, std::back_inserter(Anew), [&A](float weight) { - float summed_weight = std::accumulate( - A.begin(), A.end(), 0.0f, - [&weight](float acc, float a) { return acc + a * weight; }); - return summed_weight; - }); - activation_func(Anew); - return Anew; -}; - -} // namespace Utilities +namespace Utilities {} // namespace Utilities #endif diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 8f1605f..a6bdde9 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -1,8 +1,12 @@ include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) -add_executable(Unit_Tests_run +set(TEST_SOURCES test_activation_functions.cpp - test_utility.cpp + test_neural_net.cpp +) + +add_executable(Unit_Tests_run + ${TEST_SOURCES} ) target_link_libraries(Unit_Tests_run gtest gtest_main) diff --git a/tests/unit_tests/test_neural_net.cpp b/tests/unit_tests/test_neural_net.cpp new file mode 100644 index 0000000..d959903 --- /dev/null +++ b/tests/unit_tests/test_neural_net.cpp @@ -0,0 +1,123 @@ +#include "../src/activation_function.hpp" +#include "../src/neural_net.hpp" +#include +#include +#include +#include + +class NeuralNetTest : public ::testing::Test { +protected: + void SetUp() override { + // Create a simple neural network with 2 input neurons, 2 hidden neurons, + // and 2 output neurons + std::vector layer_sizes = {2, 2, 2}; + net = std::make_unique>(layer_sizes); + } + + std::unique_ptr> net; +}; + +TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) { + // Test a simple network with known weights and inputs + std::vector input = {0.5f, 0.5f}; + + // Set known weights for testing + std::vector> weights = { + Matrix(2, 2, 0.5f), // First layer weights + Matrix(2, 2, 0.5f) // Output layer weights + }; + + // Replace the network's weights with our test weights + net->set_weights(weights); + + // Calculate expected output manually + // First layer: Z1 = W1 * X + Matrix X(2, 1, 0.0); + X(0, 0) = input[0]; + X(1, 0) = input[1]; + + Matrix Z1 = weights[0] * X; + // Apply sigmoid activation + Sigmoid sigmoid; + sigmoid(Z1.data()); + + // Second layer: Z2 = W2 * A1 + Matrix Z2 = weights[1] * Z1; + SoftMax softmax; + softmax(Z2.data()); + + // Convert to output vector + std::vector expected_output(Z2.cols()); + for (size_t i = 0; i < Z2.rows(); i++) { + expected_output[i] = Z2(i, 0); + } + + // Get actual output from feed_forward + std::vector output = net->feed_forward(input); + + // Compare actual and expected outputs + for (size_t i = 0; i < output.size(); i++) { + EXPECT_NEAR(output[i], expected_output[i], 1e-6); + } +} + +TEST_F(NeuralNetTest, FeedForward_DifferentLayerSizes) { + // Create a network with different layer sizes + std::vector layer_sizes = {3, 4, 2}; + NeuralNet net2(layer_sizes); + + std::vector input = {0.1f, 0.2f, 0.3f}; + std::vector output = net2.feed_forward(input); + + // Output should have 2 elements (size of last layer) + EXPECT_EQ(output.size(), 2); +} + +TEST_F(NeuralNetTest, FeedForward_InvalidInputSize) { + std::vector input = {0.1f}; // Only 1 input, but network expects 2 + + // This should throw an exception since input size doesn't match first layer + // size + EXPECT_THROW(net->feed_forward(input), std::invalid_argument); +} + +TEST_F(NeuralNetTest, FeedForward_IdentityTest) { + // Create a network with identity weights (1.0) and no bias + std::vector layer_sizes = {2, 2}; + NeuralNet net2(layer_sizes); + + // Set weights to identity matrix + std::vector> weights = {Matrix(2, 2, 1.0f)}; + + net2.set_weights(weights); + + std::vector input = {0.5f, 0.5f}; + std::vector output = net2.feed_forward(input); + + // Since we're using sigmoid activation, the output should be + // sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron + SoftMax softmax; + std::vector expected_output = input; + softmax(expected_output); + + for (float val : output) { + EXPECT_NEAR(val, expected_output[0], 1e-6); + } +} + +TEST_F(NeuralNetTest, FeedForward_SoftmaxOutput) { + std::vector input = {1.0f, -1.0f}; + std::vector output = net->feed_forward(input); + + // Verify that the output sums to 1 (property of softmax) + float sum = 0.0f; + for (float val : output) { + sum += val; + } + EXPECT_NEAR(sum, 1.0f, 1e-6); + + // Verify that all outputs are positive + for (float val : output) { + EXPECT_GT(val, 0.0f); + } +} diff --git a/tests/unit_tests/test_utility.cpp b/tests/unit_tests/test_utility.cpp deleted file mode 100644 index 07cca8b..0000000 --- a/tests/unit_tests/test_utility.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "activation_function.hpp" -#include "utility.hpp" -#include -#include - -// Simple identity activation function for testing -struct Identity { - void operator()(std::vector &x) const { - // Identity function - no change to values - // Below statement is needed to remove compiler warning about unused var - // warning - (void)x; - } -}; - -TEST(UtilityTest, FeedLayerIdentityTest) { - // Test with identity activation function for simple verification - // Input: [1, 2] - // Weights: [0.5, -0.5, 1.0, -1.0] - - std::vector weights = {0.5, -0.5, 1.0, -1.0}; - std::vector input = {1.0, 2.0}; - Identity identity; - - auto output = Utilities::feed_layer(weights.begin(), weights.end(), - input, identity); - - ASSERT_EQ(output.size(), 4); - EXPECT_NEAR(output[0], 1.5f, 1e-5); // 1.0 * 0.5 + 2.0 * 0.5 - EXPECT_NEAR(output[1], -1.5f, 1e-5); // 1.0 * -0.5 + 2.0 * -0.5 - EXPECT_NEAR(output[2], 3.0f, 1e-5); // 1.0 * 1.0 + 2.0 * 1.0 - EXPECT_NEAR(output[3], -3.0f, 1e-5); // 1.0 * -1.0 + 2.0 * -1.0 -} - -TEST(UtilityTest, FeedLayerSigmoidTest) { - // Test with sigmoid activation - // Input: [1] - // Weights: [2, -2] - std::vector weights = {2.0, -2.0}; - std::vector input = {1.0}; - Sigmoid sigmoid; - - auto output = Utilities::feed_layer(weights.begin(), weights.end(), - input, sigmoid); - - ASSERT_EQ(output.size(), 2); - // Note: Sigmoid is applied to the whole vector after matrix multiplication - float expected0 = 2.0; // 1.0 * 2.0 - float expected1 = -2.0; // 1.0 * -2.0 - EXPECT_NEAR(output[0], 1.0 / (1.0 + std::exp(-expected0)), 1e-5); - EXPECT_NEAR(output[1], 1.0 / (1.0 + std::exp(-expected1)), 1e-5); -} - -TEST(UtilityTest, FeedLayerSoftMaxTest) { - // Test with softmax activation - // Input: [1] - // Weights: [2, 2] - std::vector weights = {2.0, 2.0}; - std::vector input = {1.0}; - SoftMax softmax; - - auto output = Utilities::feed_layer(weights.begin(), weights.end(), - input, softmax); - - ASSERT_EQ(output.size(), 2); - // Both outputs should be 0.5 since inputs to softmax are equal (both 2.0) - EXPECT_NEAR(output[0], 0.5, 1e-5); - EXPECT_NEAR(output[1], 0.5, 1e-5); -} - -TEST(UtilityTest, FeedLayerEmptyInput) { - std::vector weights = {1.0, 1.0}; - std::vector input = {}; - Identity identity; - - auto output = Utilities::feed_layer(weights.begin(), weights.end(), - input, identity); - - ASSERT_EQ(output.size(), 2); - EXPECT_NEAR(output[0], 0.0f, 1e-5); - EXPECT_NEAR(output[1], 0.0f, 1e-5); -}