Refactor forward propagation code again

Refactor to make activation functions static
2025-03-30 00:40:12 -04:00 · 2025-03-29 23:25:29 -04:00
10 changed files with 207 additions and 180 deletions
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -4,6 +4,7 @@ set(HEADER_FILES
    ./activation_function.hpp
    ./neural_net.hpp
    ./utility.hpp
+    ./forward_feed.hpp
 )
 set(SOURCE_FILES
 )
--- a/src/activation_function.hpp
+++ b/src/activation_function.hpp
@ -8,13 +8,13 @@
 * Functor to set the activation function as a Sigmoid function
 */
 struct Sigmoid {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
    for (size_t i = 0; i < z.size(); i++) {
      z[i] = 1 / (1 + exp(-z[i]));
    };
  };
-  float init_stddev(int n) { return sqrt(1.0 / n); };
-  float derivative(float x) {
+  float static init_stddev(int n) { return sqrt(1.0 / n); };
+  float static derivative(float x) {
    float exp_x = exp(-x);
    return exp_x / pow(exp_x + 1.0, 2.0);
  }
@ -24,13 +24,13 @@ struct Sigmoid {
 * Functor to set the activation function as Rectified Linear Unit
 */
 struct ReLU {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
    for (size_t i = 0; i < z.size(); i++) {
      z[i] = std::max(0.0f, z[i]);
    };
  };
-  float init_stddev(int n) { return sqrt(2.0 / n); };
-  float derivative(float x) {
+  float static init_stddev(int n) { return sqrt(2.0 / n); };
+  float static derivative(float x) {
    if (x < 0) {
      return 0;
    } else {
@ -44,7 +44,7 @@ struct ReLU {
 * This is generally used in the final output layer
 */
 struct SoftMax {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
    float zmax = *std::max_element(z.begin(), z.end());
    float sum = 0.0;
    for (size_t i = 0; i < z.size(); i++) {
--- a/src/cost_function.hpp
+++ b/src/cost_function.hpp
@ -0,0 +1,16 @@
+#include <cmath>
+#include <vector>
+
+/** Categorical cross entropy loss function for multi category categorization
+ * tasks
+ *
+ */
+struct CategoricalCrossEntropy {
+  float static loss(std::vector<float> y, std::vector<float> yhat) {
+    float loss = 0;
+    for (int i; i < y.size(); i++) {
+      loss += y[i] * log(yhat[i]);
+    }
+    return loss;
+  }
+};
--- a/src/forward_feed.hpp
+++ b/src/forward_feed.hpp
@ -0,0 +1,38 @@
+/** Apply forward feeding to a fully connect neural network.
+ * This struct stores the final output as well as the activations that occur
+ * for use in backpropagation
+ *
+ */
+#include "activation_function.hpp"
+#include "matrix.hpp"
+#include <vector>
+
+template <class ActivationFunction> struct ForwardFeed {
+  std::vector<Matrix<float>> m_activations;
+  std::vector<float> m_yhat;
+
+  ForwardFeed(const std::vector<float> &x,
+              const std::vector<Matrix<float>> &weights) {
+    // Convert input vector to matrix
+    Matrix<float> A = Matrix<float>(x.size(), 1, x);
+
+    // Feed each layer forward except the last layer using the user specified
+    // activation function
+    m_activations.reserve(weights.size());
+    for (size_t i = 0; i < weights.size() - 1; i++) {
+      // Calculate Z = W * A
+      Matrix Z = weights[i] * A;
+
+      // Apply activation function
+      ActivationFunction::apply(Z.data());
+      m_activations.push_back(A);
+      A = Z;
+    }
+
+    // Always use soft max for the final layer
+    Matrix Z = weights.back() * A;
+    SoftMax::apply(Z.data());
+
+    m_yhat = Z.data();
+  };
+};
--- a/src/matrix.hpp
+++ b/src/matrix.hpp
@ -13,7 +13,7 @@ public:
  Matrix(size_t rows, size_t cols, T value)
      : m_rows(rows), m_cols(cols), m_data(rows * cols, value) {}

-  // Create a matrix from a 1d vector using move semantics
+  // Create a matrix from a 1d vector
  Matrix(size_t rows, size_t cols, std::vector<T> data)
      : m_rows(rows), m_cols(cols), m_data(data) {

--- a/src/neural_net.hpp
+++ b/src/neural_net.hpp
@ -1,17 +1,13 @@
 #ifndef NEURAL_NET_H
 #define NEURAL_NET_H

-#include "activation_function.hpp"
 #include "matrix.hpp"
 #include <random>
 #include <vector>

-template <class ActivationFunction> class NeuralNet {
+template <class ActivationFunction, class LossFunction> class NeuralNet {
 public:
  NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) {
-    // Initialize the activation function
-    m_activation_func = ActivationFunction();
-
    // Create random sampling device
    std::random_device rd{};
    std::mt19937 gen{rd()};
@ -26,7 +22,7 @@ public:
      Matrix<float> W(rows, cols, 0.0);
      for (size_t j = 0; j < rows; j++) {
        for (size_t k = 0; k < cols; k++) {
-          W(j, k) = dist(gen) * m_activation_func.init_stddev(cols);
+          W(j, k) = dist(gen) * ActivationFunction::init_stddev(cols);
        }
      }
      m_weights.push_back(W);
@ -62,42 +58,9 @@ public:
    m_weights = new_weights;
  };

-  /** Pass input vector through the neural network.
-   * This is a fully connected neural network geometry.
-   * @param x Input vector
-   * @return output of feed forward phase
-   */
-  std::vector<float> feed_forward(const std::vector<float> &x) {
-    // Convert input vector to matrix
-    Matrix<float> A = Matrix<float>(x.size(), 1, x);
-
-    // Feed each layer forward except the last layer using the user specified
-    // activation function
-    for (size_t i = 0; i < m_sizes.size() - 2; i++) {
-      // Calculate Z = W * A
-      Matrix Z = m_weights[i] * A;
-
-      // Apply activation function
-      m_activation_func(Z.data());
-      A = Z;
-    }
-
-    // Always use soft max for the final layer
-    Matrix Z = m_weights.back() * A;
-    m_soft_max(Z.data());
-
-    // Convert final output to vector
-    std::vector<float> output(Z.rows());
-    for (size_t i = 0; i < Z.rows(); i++) {
-      output[i] = Z(i, 0);
-    }
-    return output;
-  };
-
 private:
-  ActivationFunction m_activation_func;
-  SoftMax m_soft_max;
  std::vector<size_t> m_sizes;
  std::vector<Matrix<float>> m_weights;
 };
+
 #endif
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@ -1,8 +1,8 @@
 include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})

 set(TEST_SOURCES
-    test_activation_functions.cpp
-    test_neural_net.cpp
+    ./test_activation_functions.cpp
+    ./test_feed_forward.cpp
 )

 add_executable(Unit_Tests_run
--- a/tests/unit_tests/test_activation_functions.cpp
+++ b/tests/unit_tests/test_activation_functions.cpp
@ -4,14 +4,13 @@
 #include <vector>

 TEST(ActivationFunctionTest, SigmoidTest) {
-  Sigmoid sigmoid;
  std::vector<float> input = {0.0, 10.0, -10.0, 1.0, -1.0};
  std::vector<float> expected = {0.5, 0.9999546, 0.0000454,
                                 static_cast<float>(1.0 / (1.0 + exp(-1.0))),
                                 static_cast<float>(1.0 / (1.0 + exp(1.0)))};

  std::vector<float> test = input;
-  sigmoid(test);
+  Sigmoid::apply(test);

  ASSERT_EQ(test.size(), expected.size());
  for (size_t i = 0; i < test.size(); i++) {
@ -19,16 +18,15 @@ TEST(ActivationFunctionTest, SigmoidTest) {
  }

  // Test initialization standard deviation
-  EXPECT_NEAR(sigmoid.init_stddev(100), sqrt(1.0 / 100), 1e-6);
+  EXPECT_NEAR(Sigmoid::init_stddev(100), sqrt(1.0 / 100), 1e-6);
 }

 TEST(ActivationFunctionTest, ReLUTest) {
-  ReLU relu;
  std::vector<float> input = {0.0, 5.0, -5.0, 0.0001, -0.0001};
  std::vector<float> expected = {0.0, 5.0, 0.0, 0.0001, 0.0};

  std::vector<float> test = input;
-  relu(test);
+  ReLU::apply(test);

  ASSERT_EQ(test.size(), expected.size());
  for (size_t i = 0; i < test.size(); i++) {
@ -36,7 +34,7 @@ TEST(ActivationFunctionTest, ReLUTest) {
  }

  // Test initialization standard deviation
-  EXPECT_NEAR(relu.init_stddev(100), sqrt(2.0 / 100), 1e-6);
+  EXPECT_NEAR(ReLU::init_stddev(100), sqrt(2.0 / 100), 1e-6);
 }

 TEST(ActivationFunctionTest, SoftMaxTest) {
@ -44,7 +42,7 @@ TEST(ActivationFunctionTest, SoftMaxTest) {
  std::vector<float> input = {1.0, 2.0, 3.0, 4.0, 1.0};
  std::vector<float> test = input;

-  softmax(test);
+  SoftMax::apply(test);

  // Test properties of softmax
  ASSERT_EQ(test.size(), input.size());
--- a/tests/unit_tests/test_feed_forward.cpp
+++ b/tests/unit_tests/test_feed_forward.cpp
@ -0,0 +1,134 @@
+#include "../../src/activation_function.hpp"
+#include "../../src/forward_feed.hpp"
+#include "../../src/matrix.hpp"
+#include <gtest/gtest.h>
+#include <stdexcept>
+#include <vector>
+
+class ForwardFeedTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Create simple weights for testing
+    weights = {Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5}),
+               Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5})};
+  }
+
+  std::vector<Matrix<float>> weights;
+};
+
+TEST_F(ForwardFeedTest, BasicForwardFeed) {
+
+  // Create input data
+  std::vector<float> input = {1.0, 2.0};
+
+  // Create ForwardFeed with ReLU activation
+  ForwardFeed<ReLU> feed(input, weights);
+
+  // Verify output size
+  EXPECT_EQ(feed.m_yhat.size(), 2);
+
+  // Verify number of activations stored
+  EXPECT_EQ(feed.m_activations.size(), 1); // Only one hidden layer
+
+  // Verify input was stored as first activation
+  EXPECT_EQ(feed.m_activations[0].rows(), 2);
+  EXPECT_EQ(feed.m_activations[0].cols(), 1);
+  EXPECT_FLOAT_EQ(feed.m_activations[0](0, 0), 1.0);
+  EXPECT_FLOAT_EQ(feed.m_activations[0](1, 0), 2.0);
+}
+
+TEST_F(ForwardFeedTest, DifferentActivationFunctions) {
+  // Test with different activation functions
+  std::vector<float> input = {1.0, 2.0};
+
+  // Test with Sigmoid
+  ForwardFeed<Sigmoid> sigmoid_feed(input, weights);
+  EXPECT_EQ(sigmoid_feed.m_yhat.size(), 2);
+
+  // Test with ReLU
+  ForwardFeed<ReLU> relu_feed(input, weights);
+  EXPECT_EQ(relu_feed.m_yhat.size(), 2);
+
+  // Test with different input values
+  std::vector<float> neg_input = {-1.0, -2.0};
+  ForwardFeed<ReLU> neg_feed(neg_input, weights);
+  EXPECT_EQ(neg_feed.m_yhat.size(), 2);
+}
+
+TEST_F(ForwardFeedTest, ActivationStorage) {
+  // Test that activations are properly stored
+  std::vector<float> input = {1.0, 2.0};
+  ForwardFeed<ReLU> feed(input, weights);
+
+  // Verify first activation (input)
+  EXPECT_EQ(feed.m_activations[0].rows(), 2);
+  EXPECT_EQ(feed.m_activations[0].cols(), 1);
+  EXPECT_FLOAT_EQ(feed.m_activations[0](0, 0), 1.0);
+  EXPECT_FLOAT_EQ(feed.m_activations[0](1, 0), 2.0);
+
+  // Verify final output (after softmax)
+  EXPECT_EQ(feed.m_yhat.size(), 2);
+  float sum = 0.0;
+  for (float val : feed.m_yhat) {
+    sum += val;
+  }
+  EXPECT_NEAR(sum, 1.0, 1e-6); // Softmax outputs should sum to 1
+}
+
+TEST_F(ForwardFeedTest, EdgeCases) {
+  // Test with zero input
+  std::vector<float> zero_input = {0.0, 0.0};
+  ForwardFeed<ReLU> zero_feed(zero_input, weights);
+  EXPECT_EQ(zero_feed.m_yhat.size(), 2);
+
+  // Test with negative input
+  std::vector<float> neg_input = {-1.0, -2.0};
+  ForwardFeed<ReLU> neg_feed(neg_input, weights);
+  EXPECT_EQ(neg_feed.m_yhat.size(), 2);
+
+  // Test with large input
+  std::vector<float> large_input = {100.0, 200.0};
+  ForwardFeed<ReLU> large_feed(large_input, weights);
+  EXPECT_EQ(large_feed.m_yhat.size(), 2);
+}
+
+TEST_F(ForwardFeedTest, DifferentNetworkSizes) {
+  // Test with different network architectures
+  std::vector<std::vector<Matrix<float>>> test_weights = {
+      // Single hidden layer
+      {Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5}),
+       Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5})},
+      // Multiple hidden layers
+      {Matrix<float>(3, 2, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5}),
+       Matrix<float>(2, 3, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5}),
+       Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5})}};
+
+  for (const auto &w : test_weights) {
+    std::vector<float> input(2, 1.0);
+    ForwardFeed<ReLU> feed(input, w);
+
+    // Verify number of activations matches number of hidden layers
+    EXPECT_EQ(feed.m_activations.size(), w.size() - 1);
+
+    // Verify final output size
+    EXPECT_EQ(feed.m_yhat.size(), w.back().rows());
+  }
+}
+
+TEST_F(ForwardFeedTest, WeightMatrixDimensions) {
+  // Test with invalid weight matrix dimensions
+  std::vector<float> input = {1.0, 2.0};
+
+  // Test with mismatched dimensions
+  std::vector<Matrix<float>> invalid_weights = {
+      Matrix<float>(2, 3, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5}), // 3x2 instead of 2x2
+      Matrix<float>(2, 2, {0.5, 0.5, 0.5, 0.5})};
+
+  EXPECT_THROW(ForwardFeed<ReLU> feed(input, invalid_weights),
+               std::invalid_argument);
+}
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
--- a/tests/unit_tests/test_neural_net.cpp
+++ b/tests/unit_tests/test_neural_net.cpp
@ -1,123 +0,0 @@
-#include "../src/activation_function.hpp"
-#include "../src/neural_net.hpp"
-#include <cmath>
-#include <gtest/gtest.h>
-#include <stdexcept>
-#include <vector>
-
-class NeuralNetTest : public ::testing::Test {
-protected:
-  void SetUp() override {
-    // Create a simple neural network with 2 input neurons, 2 hidden neurons,
-    // and 2 output neurons
-    std::vector<size_t> layer_sizes = {2, 2, 2};
-    net = std::make_unique<NeuralNet<Sigmoid>>(layer_sizes);
-  }
-
-  std::unique_ptr<NeuralNet<Sigmoid>> net;
-};
-
-TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) {
-  // Test a simple network with known weights and inputs
-  std::vector<float> input = {0.5f, 0.5f};
-
-  // Set known weights for testing
-  std::vector<Matrix<float>> weights = {
-      Matrix<float>(2, 2, 0.5f), // First layer weights
-      Matrix<float>(2, 2, 0.5f)  // Output layer weights
-  };
-
-  // Replace the network's weights with our test weights
-  net->set_weights(weights);
-
-  // Calculate expected output manually
-  // First layer: Z1 = W1 * X
-  Matrix<float> X(2, 1, 0.0);
-  X(0, 0) = input[0];
-  X(1, 0) = input[1];
-
-  Matrix<float> Z1 = weights[0] * X;
-  // Apply sigmoid activation
-  Sigmoid sigmoid;
-  sigmoid(Z1.data());
-
-  // Second layer: Z2 = W2 * A1
-  Matrix<float> Z2 = weights[1] * Z1;
-  SoftMax softmax;
-  softmax(Z2.data());
-
-  // Convert to output vector
-  std::vector<float> expected_output(Z2.cols());
-  for (size_t i = 0; i < Z2.rows(); i++) {
-    expected_output[i] = Z2(i, 0);
-  }
-
-  // Get actual output from feed_forward
-  std::vector<float> output = net->feed_forward(input);
-
-  // Compare actual and expected outputs
-  for (size_t i = 0; i < output.size(); i++) {
-    EXPECT_NEAR(output[i], expected_output[i], 1e-6);
-  }
-}
-
-TEST_F(NeuralNetTest, FeedForward_DifferentLayerSizes) {
-  // Create a network with different layer sizes
-  std::vector<size_t> layer_sizes = {3, 4, 2};
-  NeuralNet<Sigmoid> net2(layer_sizes);
-
-  std::vector<float> input = {0.1f, 0.2f, 0.3f};
-  std::vector<float> output = net2.feed_forward(input);
-
-  // Output should have 2 elements (size of last layer)
-  EXPECT_EQ(output.size(), 2);
-}
-
-TEST_F(NeuralNetTest, FeedForward_InvalidInputSize) {
-  std::vector<float> input = {0.1f}; // Only 1 input, but network expects 2
-
-  // This should throw an exception since input size doesn't match first layer
-  // size
-  EXPECT_THROW(net->feed_forward(input), std::invalid_argument);
-}
-
-TEST_F(NeuralNetTest, FeedForward_IdentityTest) {
-  // Create a network with identity weights (1.0) and no bias
-  std::vector<size_t> layer_sizes = {2, 2};
-  NeuralNet<Sigmoid> net2(layer_sizes);
-
-  // Set weights to identity matrix
-  std::vector<Matrix<float>> weights = {Matrix<float>(2, 2, 1.0f)};
-
-  net2.set_weights(weights);
-
-  std::vector<float> input = {0.5f, 0.5f};
-  std::vector<float> output = net2.feed_forward(input);
-
-  // Since we're using sigmoid activation, the output should be
-  // sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron
-  SoftMax softmax;
-  std::vector<float> expected_output = input;
-  softmax(expected_output);
-
-  for (float val : output) {
-    EXPECT_NEAR(val, expected_output[0], 1e-6);
-  }
-}
-
-TEST_F(NeuralNetTest, FeedForward_SoftmaxOutput) {
-  std::vector<float> input = {1.0f, -1.0f};
-  std::vector<float> output = net->feed_forward(input);
-
-  // Verify that the output sums to 1 (property of softmax)
-  float sum = 0.0f;
-  for (float val : output) {
-    sum += val;
-  }
-  EXPECT_NEAR(sum, 1.0f, 1e-6);
-
-  // Verify that all outputs are positive
-  for (float val : output) {
-    EXPECT_GT(val, 0.0f);
-  }
-}
Author	SHA1	Message	Date
Alex Selimov	92e9a4a719	Refactor forward propagation code again	2025-03-30 00:40:12 -04:00
Alex Selimov	47ef7c25d7	Refactor to make activation functions static	2025-03-29 23:25:29 -04:00