Fix feed_forward implementation

This commit is contained in:
Alex Selimov 2025-03-29 23:13:13 -04:00
parent 0ac3df4e1e
commit a578cc0c5b
9 changed files with 326 additions and 212 deletions

View File

@ -15,6 +15,12 @@ add_subdirectory(tests)
target_link_libraries(${CMAKE_PROJECT_NAME}_run ${CMAKE_PROJECT_NAME}_lib)
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
target_link_libraries(${CMAKE_PROJECT_NAME}_run OpenMP::OpenMP_CXX)
endif()
# Doxygen Build
option(BUILD_DOC "Build Documentation" ON)

View File

@ -13,7 +13,11 @@ struct Sigmoid {
z[i] = 1 / (1 + exp(-z[i]));
};
};
double init_stddev(int n) { return sqrt(1.0 / n); };
float init_stddev(int n) { return sqrt(1.0 / n); };
float derivative(float x) {
float exp_x = exp(-x);
return exp_x / pow(exp_x + 1.0, 2.0);
}
};
/**
@ -25,7 +29,14 @@ struct ReLU {
z[i] = std::max(0.0f, z[i]);
};
};
double init_stddev(int n) { return sqrt(2.0 / n); };
float init_stddev(int n) { return sqrt(2.0 / n); };
float derivative(float x) {
if (x < 0) {
return 0;
} else {
return 1;
}
};
};
/**
@ -44,7 +55,7 @@ struct SoftMax {
z[i] = z[i] / sum;
};
};
double init_stddev(int n) { return sqrt(1.0 / n); };
float init_stddev(int n) { return sqrt(1.0 / n); };
};
#endif

111
src/matrix.hpp Normal file
View File

@ -0,0 +1,111 @@
#ifndef MATRIX_H
#define MATRIX_H
#include <stdexcept>
#include <vector>
template <typename T> class Matrix {
public:
// Create an empty matrix
Matrix() : m_rows(0), m_cols(0) {}
// Create a matrix with specified dimensions and initialize with value
Matrix(size_t rows, size_t cols, T value)
: m_rows(rows), m_cols(cols), m_data(rows * cols, value) {}
// Create a matrix from a 1d vector using move semantics
Matrix(size_t rows, size_t cols, std::vector<T> data)
: m_rows(rows), m_cols(cols), m_data(data) {
if (m_rows * m_cols != m_data.size()) {
throw std::invalid_argument(
"The size of input data doesn't match the provided dimensions");
}
}
// Create a matrix from a vector of vectors
Matrix(const std::vector<std::vector<T>> &data) {
if (data.empty()) {
m_rows = 0;
m_cols = 0;
return;
}
m_rows = data.size();
m_cols = data[0].size();
m_data.resize(m_rows * m_cols);
for (size_t i = 0; i < m_rows; ++i) {
if (data[i].size() != m_cols) {
throw std::invalid_argument(
"All rows must have the same number of columns");
}
for (size_t j = 0; j < m_cols; ++j) {
m_data[i * m_cols + j] = data[i][j];
}
}
}
// Access element (row, col)
float &operator()(size_t row, size_t col) {
check_bounds(row, col);
return m_data[row * m_cols + col];
}
const float &operator()(size_t row, size_t col) const {
check_bounds(row, col);
return m_data[row * m_cols + col];
}
// Matrix multiplication
Matrix operator*(const Matrix &other) const {
if (m_cols != other.m_rows) {
throw std::invalid_argument(
"Matrix dimensions do not match for multiplication");
}
Matrix result(m_rows, other.m_cols, 0.0);
#pragma omp parallel for
for (size_t i = 0; i < m_rows; ++i) {
for (size_t j = 0; j < other.m_cols; ++j) {
float sum = 0.0f;
for (size_t k = 0; k < m_cols; ++k) {
sum += (*this)(i, k) * other(k, j);
}
result(i, j) = sum;
}
}
return result;
}
// Get matrix dimensions
size_t rows() const { return m_rows; }
size_t cols() const { return m_cols; }
// Get raw data access
std::vector<float> &data() { return m_data; }
// Transpose the matrix
Matrix transpose() const {
Matrix result(m_cols, m_rows);
for (size_t i = 0; i < m_rows; ++i) {
for (size_t j = 0; j < m_cols; ++j) {
result(j, i) = (*this)(i, j);
}
}
return result;
}
private:
void check_bounds(size_t row, size_t col) const {
if (row >= m_rows || col >= m_cols) {
throw std::out_of_range("Matrix index out of bounds");
}
}
size_t m_rows;
size_t m_cols;
std::vector<T> m_data;
};
#endif // MATRIX_H

View File

@ -1,65 +0,0 @@
#include "neural_net.hpp"
#include "utility.hpp"
#include <functional>
#include <numeric>
#include <random>
#include <vector>
template <class ActivationFunction>
NeuralNet<ActivationFunction>::NeuralNet(std::vector<size_t> &layer_sizes)
: m_sizes(layer_sizes) {
int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), 0,
std::plus<size_t>());
// Initialize the activation function
m_activation_func = ActivationFunction();
// Create random sampling device
std::random_device rd{};
std::mt19937 gen{rd()};
std::normal_distribution<float> dist{0.0, 1.0};
// Initialize the weights
m_weights.reserve(total_neurons);
int start_idx = 0;
for (auto size : m_sizes) {
for (int i = 0; i < size; i++) {
m_weights[i + start_idx] =
dist(gen) * m_activation_func.init_stddev(size);
}
start_idx += size;
}
}
/** Pass input vector through the neural network.
* This is a fully connected neural network geometry.
* @param x Input vector
* @return output of feed forward phase
*/
template <class ActivationFunction>
std::vector<float>
NeuralNet<ActivationFunction>::feed_forward(std::vector<float> &x) {
std::vector<float> A = x;
int start_idx = 0;
// Feed each layer forward except the last layer using the user specified
// activation function
for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) {
// Get the iterator range for the current layer
auto layer_start = m_weights.begin() + start_idx;
auto layer_end = m_weights.end() + start_idx + *size;
std::vector<float> Anew = Utilities::feed_layer<ActivationFunction>(
layer_start, layer_end, &A, m_activation_func);
if (Anew.size() > A.capacity()) {
A.reserve(Anew.size());
}
std::move(Anew.begin(), Anew.end(), A.begin());
start_idx += *size;
}
// Always use soft max for the final layer
auto last_layer_start = m_weights.begin() + start_idx;
auto output = Utilities::feed_layer<SoftMax>(last_layer_start,
m_weights.end(), A, m_soft_max);
return output;
}

View File

@ -2,14 +2,13 @@
#define NEURAL_NET_H
#include "activation_function.hpp"
#include "utility.hpp"
#include "matrix.hpp"
#include <random>
#include <vector>
template <class ActivationFunction> class NeuralNet {
public:
NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) {
int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(),
0, std::plus<size_t>());
// Initialize the activation function
m_activation_func = ActivationFunction();
@ -18,54 +17,87 @@ public:
std::mt19937 gen{rd()};
std::normal_distribution<float> dist{0.0, 1.0};
// Initialize the weights
m_weights.reserve(total_neurons);
int start_idx = 0;
for (auto size : m_sizes) {
for (int i = 0; i < size; i++) {
m_weights[i + start_idx] =
dist(gen) * m_activation_func.init_stddev(size);
// Initialize weights for each layer connection
for (size_t i = 0; i < m_sizes.size() - 1; i++) {
size_t rows = m_sizes[i + 1]; // neurons in next layer
size_t cols = m_sizes[i]; // neurons in current layer
// Create and initialize weight matrix
Matrix<float> W(rows, cols, 0.0);
for (size_t j = 0; j < rows; j++) {
for (size_t k = 0; k < cols; k++) {
W(j, k) = dist(gen) * m_activation_func.init_stddev(cols);
}
}
start_idx += size;
m_weights.push_back(W);
}
};
private:
ActivationFunction m_activation_func;
SoftMax m_soft_max;
std::vector<size_t> m_sizes;
std::vector<float> m_weights;
// Set new weights for the network
void set_weights(const std::vector<Matrix<float>> &new_weights) {
// Validate new weights
if (new_weights.empty()) {
throw std::invalid_argument("Weights vector cannot be empty");
}
// Validate layer sizes match
if (new_weights.size() != m_weights.size()) {
throw std::invalid_argument(
"Number of weight matrices doesn't match network architecture");
}
// Validate layer connectivity
for (size_t i = 0; i < new_weights.size(); i++) {
if (new_weights[i].rows() != m_weights[i].rows()) {
throw std::invalid_argument(
"New weight matrix rows don't match existing architecture");
}
if (new_weights[i].cols() != m_weights[i].cols()) {
throw std::invalid_argument(
"New weight matrix columns don't match existing architecture");
}
}
// Update weights
m_weights = new_weights;
};
/** Pass input vector through the neural network.
* This is a fully connected neural network geometry.
* @param x Input vector
* @return output of feed forward phase
*/
std::vector<float> feed_forward(std::vector<float> &x) {
std::vector<float> A = x;
int start_idx = 0;
std::vector<float> feed_forward(const std::vector<float> &x) {
// Convert input vector to matrix
Matrix<float> A = Matrix<float>(x.size(), 1, x);
// Feed each layer forward except the last layer using the user specified
// activation function
for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) {
// Get the iterator range for the current layer
auto layer_start = m_weights.begin() + start_idx;
auto layer_end = m_weights.end() + start_idx + *size;
for (size_t i = 0; i < m_sizes.size() - 2; i++) {
// Calculate Z = W * A
Matrix Z = m_weights[i] * A;
std::vector<float> Anew = Utilities::feed_layer<ActivationFunction>(
layer_start, layer_end, &A, m_activation_func);
if (Anew.size() > A.capacity()) {
A.reserve(Anew.size());
}
std::move(Anew.begin(), Anew.end(), A.begin());
start_idx += *size;
// Apply activation function
m_activation_func(Z.data());
A = Z;
}
// Always use soft max for the final layer
auto last_layer_start = m_weights.begin() + start_idx;
auto output = Utilities::feed_layer<SoftMax>(
last_layer_start, m_weights.end(), A, m_soft_max);
Matrix Z = m_weights.back() * A;
m_soft_max(Z.data());
// Convert final output to vector
std::vector<float> output(Z.rows());
for (size_t i = 0; i < Z.rows(); i++) {
output[i] = Z(i, 0);
}
return output;
};
private:
ActivationFunction m_activation_func;
SoftMax m_soft_max;
std::vector<size_t> m_sizes;
std::vector<Matrix<float>> m_weights;
};
#endif

View File

@ -1,31 +1,5 @@
#ifndef UTILITY_H
#define UTILITY_H
#include <algorithm>
#include <iterator>
#include <numeric>
#include <vector>
namespace Utilities {
template <class ActivationFunction>
std::vector<float> feed_layer(std::vector<float>::iterator weight_start,
std::vector<float>::iterator weight_end,
std::vector<float> &A,
ActivationFunction activation_func) {
// Calculate the new A vector from the current weights
std::vector<float> Anew;
Anew.reserve(std::distance(weight_start, weight_end));
std::transform(
weight_start, weight_end, std::back_inserter(Anew), [&A](float weight) {
float summed_weight = std::accumulate(
A.begin(), A.end(), 0.0f,
[&weight](float acc, float a) { return acc + a * weight; });
return summed_weight;
});
activation_func(Anew);
return Anew;
};
} // namespace Utilities
namespace Utilities {} // namespace Utilities
#endif

View File

@ -1,8 +1,12 @@
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
add_executable(Unit_Tests_run
set(TEST_SOURCES
test_activation_functions.cpp
test_utility.cpp
test_neural_net.cpp
)
add_executable(Unit_Tests_run
${TEST_SOURCES}
)
target_link_libraries(Unit_Tests_run gtest gtest_main)

View File

@ -0,0 +1,123 @@
#include "../src/activation_function.hpp"
#include "../src/neural_net.hpp"
#include <cmath>
#include <gtest/gtest.h>
#include <stdexcept>
#include <vector>
class NeuralNetTest : public ::testing::Test {
protected:
void SetUp() override {
// Create a simple neural network with 2 input neurons, 2 hidden neurons,
// and 2 output neurons
std::vector<size_t> layer_sizes = {2, 2, 2};
net = std::make_unique<NeuralNet<Sigmoid>>(layer_sizes);
}
std::unique_ptr<NeuralNet<Sigmoid>> net;
};
TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) {
// Test a simple network with known weights and inputs
std::vector<float> input = {0.5f, 0.5f};
// Set known weights for testing
std::vector<Matrix<float>> weights = {
Matrix<float>(2, 2, 0.5f), // First layer weights
Matrix<float>(2, 2, 0.5f) // Output layer weights
};
// Replace the network's weights with our test weights
net->set_weights(weights);
// Calculate expected output manually
// First layer: Z1 = W1 * X
Matrix<float> X(2, 1, 0.0);
X(0, 0) = input[0];
X(1, 0) = input[1];
Matrix<float> Z1 = weights[0] * X;
// Apply sigmoid activation
Sigmoid sigmoid;
sigmoid(Z1.data());
// Second layer: Z2 = W2 * A1
Matrix<float> Z2 = weights[1] * Z1;
SoftMax softmax;
softmax(Z2.data());
// Convert to output vector
std::vector<float> expected_output(Z2.cols());
for (size_t i = 0; i < Z2.rows(); i++) {
expected_output[i] = Z2(i, 0);
}
// Get actual output from feed_forward
std::vector<float> output = net->feed_forward(input);
// Compare actual and expected outputs
for (size_t i = 0; i < output.size(); i++) {
EXPECT_NEAR(output[i], expected_output[i], 1e-6);
}
}
TEST_F(NeuralNetTest, FeedForward_DifferentLayerSizes) {
// Create a network with different layer sizes
std::vector<size_t> layer_sizes = {3, 4, 2};
NeuralNet<Sigmoid> net2(layer_sizes);
std::vector<float> input = {0.1f, 0.2f, 0.3f};
std::vector<float> output = net2.feed_forward(input);
// Output should have 2 elements (size of last layer)
EXPECT_EQ(output.size(), 2);
}
TEST_F(NeuralNetTest, FeedForward_InvalidInputSize) {
std::vector<float> input = {0.1f}; // Only 1 input, but network expects 2
// This should throw an exception since input size doesn't match first layer
// size
EXPECT_THROW(net->feed_forward(input), std::invalid_argument);
}
TEST_F(NeuralNetTest, FeedForward_IdentityTest) {
// Create a network with identity weights (1.0) and no bias
std::vector<size_t> layer_sizes = {2, 2};
NeuralNet<Sigmoid> net2(layer_sizes);
// Set weights to identity matrix
std::vector<Matrix<float>> weights = {Matrix<float>(2, 2, 1.0f)};
net2.set_weights(weights);
std::vector<float> input = {0.5f, 0.5f};
std::vector<float> output = net2.feed_forward(input);
// Since we're using sigmoid activation, the output should be
// sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron
SoftMax softmax;
std::vector<float> expected_output = input;
softmax(expected_output);
for (float val : output) {
EXPECT_NEAR(val, expected_output[0], 1e-6);
}
}
TEST_F(NeuralNetTest, FeedForward_SoftmaxOutput) {
std::vector<float> input = {1.0f, -1.0f};
std::vector<float> output = net->feed_forward(input);
// Verify that the output sums to 1 (property of softmax)
float sum = 0.0f;
for (float val : output) {
sum += val;
}
EXPECT_NEAR(sum, 1.0f, 1e-6);
// Verify that all outputs are positive
for (float val : output) {
EXPECT_GT(val, 0.0f);
}
}

View File

@ -1,82 +0,0 @@
#include "activation_function.hpp"
#include "utility.hpp"
#include <cmath>
#include <gtest/gtest.h>
// Simple identity activation function for testing
struct Identity {
void operator()(std::vector<float> &x) const {
// Identity function - no change to values
// Below statement is needed to remove compiler warning about unused var
// warning
(void)x;
}
};
TEST(UtilityTest, FeedLayerIdentityTest) {
// Test with identity activation function for simple verification
// Input: [1, 2]
// Weights: [0.5, -0.5, 1.0, -1.0]
std::vector<float> weights = {0.5, -0.5, 1.0, -1.0};
std::vector<float> input = {1.0, 2.0};
Identity identity;
auto output = Utilities::feed_layer<Identity>(weights.begin(), weights.end(),
input, identity);
ASSERT_EQ(output.size(), 4);
EXPECT_NEAR(output[0], 1.5f, 1e-5); // 1.0 * 0.5 + 2.0 * 0.5
EXPECT_NEAR(output[1], -1.5f, 1e-5); // 1.0 * -0.5 + 2.0 * -0.5
EXPECT_NEAR(output[2], 3.0f, 1e-5); // 1.0 * 1.0 + 2.0 * 1.0
EXPECT_NEAR(output[3], -3.0f, 1e-5); // 1.0 * -1.0 + 2.0 * -1.0
}
TEST(UtilityTest, FeedLayerSigmoidTest) {
// Test with sigmoid activation
// Input: [1]
// Weights: [2, -2]
std::vector<float> weights = {2.0, -2.0};
std::vector<float> input = {1.0};
Sigmoid sigmoid;
auto output = Utilities::feed_layer<Sigmoid>(weights.begin(), weights.end(),
input, sigmoid);
ASSERT_EQ(output.size(), 2);
// Note: Sigmoid is applied to the whole vector after matrix multiplication
float expected0 = 2.0; // 1.0 * 2.0
float expected1 = -2.0; // 1.0 * -2.0
EXPECT_NEAR(output[0], 1.0 / (1.0 + std::exp(-expected0)), 1e-5);
EXPECT_NEAR(output[1], 1.0 / (1.0 + std::exp(-expected1)), 1e-5);
}
TEST(UtilityTest, FeedLayerSoftMaxTest) {
// Test with softmax activation
// Input: [1]
// Weights: [2, 2]
std::vector<float> weights = {2.0, 2.0};
std::vector<float> input = {1.0};
SoftMax softmax;
auto output = Utilities::feed_layer<SoftMax>(weights.begin(), weights.end(),
input, softmax);
ASSERT_EQ(output.size(), 2);
// Both outputs should be 0.5 since inputs to softmax are equal (both 2.0)
EXPECT_NEAR(output[0], 0.5, 1e-5);
EXPECT_NEAR(output[1], 0.5, 1e-5);
}
TEST(UtilityTest, FeedLayerEmptyInput) {
std::vector<float> weights = {1.0, 1.0};
std::vector<float> input = {};
Identity identity;
auto output = Utilities::feed_layer<Identity>(weights.begin(), weights.end(),
input, identity);
ASSERT_EQ(output.size(), 2);
EXPECT_NEAR(output[0], 0.0f, 1e-5);
EXPECT_NEAR(output[1], 0.0f, 1e-5);
}