From 47ef7c25d7e1f15d359a74636b2d4dc7ada45ee5 Mon Sep 17 00:00:00 2001
From: Alex Selimov <alex@alexselimov.com>
Date: Sat, 29 Mar 2025 23:25:29 -0400
Subject: [PATCH] Refactor to make activation functions static

---
 src/activation_function.hpp                    | 14 +++++++-------
 src/neural_net.hpp                             | 11 +++--------
 tests/unit_tests/test_activation_functions.cpp | 12 +++++-------
 tests/unit_tests/test_neural_net.cpp           | 10 +++-------
 4 files changed, 18 insertions(+), 29 deletions(-)
diff --git a/src/activation_function.hpp b/src/activation_function.hpp
index 3a748f2..a1ec821 100644
--- a/src/activation_function.hpp
+++ b/src/activation_function.hpp
@@ -8,13 +8,13 @@
  * Functor to set the activation function as a Sigmoid function
  */
 struct Sigmoid {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
     for (size_t i = 0; i < z.size(); i++) {
       z[i] = 1 / (1 + exp(-z[i]));
     };
   };
-  float init_stddev(int n) { return sqrt(1.0 / n); };
-  float derivative(float x) {
+  float static init_stddev(int n) { return sqrt(1.0 / n); };
+  float static derivative(float x) {
     float exp_x = exp(-x);
     return exp_x / pow(exp_x + 1.0, 2.0);
   }
@@ -24,13 +24,13 @@ struct Sigmoid {
  * Functor to set the activation function as Rectified Linear Unit
  */
 struct ReLU {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
     for (size_t i = 0; i < z.size(); i++) {
       z[i] = std::max(0.0f, z[i]);
     };
   };
-  float init_stddev(int n) { return sqrt(2.0 / n); };
-  float derivative(float x) {
+  float static init_stddev(int n) { return sqrt(2.0 / n); };
+  float static derivative(float x) {
     if (x < 0) {
       return 0;
     } else {
@@ -44,7 +44,7 @@ struct ReLU {
  * This is generally used in the final output layer
  */
 struct SoftMax {
-  void operator()(std::vector<float> &z) {
+  void static apply(std::vector<float> &z) {
     float zmax = *std::max_element(z.begin(), z.end());
     float sum = 0.0;
     for (size_t i = 0; i < z.size(); i++) {
diff --git a/src/neural_net.hpp b/src/neural_net.hpp
index 2aae21a..40759eb 100644
--- a/src/neural_net.hpp
+++ b/src/neural_net.hpp
@@ -9,9 +9,6 @@
 template <class ActivationFunction> class NeuralNet {
 public:
   NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) {
-    // Initialize the activation function
-    m_activation_func = ActivationFunction();
-
     // Create random sampling device
     std::random_device rd{};
     std::mt19937 gen{rd()};
@@ -26,7 +23,7 @@ public:
       Matrix<float> W(rows, cols, 0.0);
       for (size_t j = 0; j < rows; j++) {
         for (size_t k = 0; k < cols; k++) {
-          W(j, k) = dist(gen) * m_activation_func.init_stddev(cols);
+          W(j, k) = dist(gen) * ActivationFunction::init_stddev(cols);
         }
       }
       m_weights.push_back(W);
@@ -78,13 +75,13 @@ public:
       Matrix Z = m_weights[i] * A;
 
       // Apply activation function
-      m_activation_func(Z.data());
+      ActivationFunction::apply(Z.data());
       A = Z;
     }
 
     // Always use soft max for the final layer
     Matrix Z = m_weights.back() * A;
-    m_soft_max(Z.data());
+    SoftMax::apply(Z.data());
 
     // Convert final output to vector
     std::vector<float> output(Z.rows());
@@ -95,8 +92,6 @@ public:
   };
 
 private:
-  ActivationFunction m_activation_func;
-  SoftMax m_soft_max;
   std::vector<size_t> m_sizes;
   std::vector<Matrix<float>> m_weights;
 };
diff --git a/tests/unit_tests/test_activation_functions.cpp b/tests/unit_tests/test_activation_functions.cpp
index 82b00a0..d18ef52 100644
--- a/tests/unit_tests/test_activation_functions.cpp
+++ b/tests/unit_tests/test_activation_functions.cpp
@@ -4,14 +4,13 @@
 #include <vector>
 
 TEST(ActivationFunctionTest, SigmoidTest) {
-  Sigmoid sigmoid;
   std::vector<float> input = {0.0, 10.0, -10.0, 1.0, -1.0};
   std::vector<float> expected = {0.5, 0.9999546, 0.0000454,
                                  static_cast<float>(1.0 / (1.0 + exp(-1.0))),
                                  static_cast<float>(1.0 / (1.0 + exp(1.0)))};
 
   std::vector<float> test = input;
-  sigmoid(test);
+  Sigmoid::apply(test);
 
   ASSERT_EQ(test.size(), expected.size());
   for (size_t i = 0; i < test.size(); i++) {
@@ -19,16 +18,15 @@ TEST(ActivationFunctionTest, SigmoidTest) {
   }
 
   // Test initialization standard deviation
-  EXPECT_NEAR(sigmoid.init_stddev(100), sqrt(1.0 / 100), 1e-6);
+  EXPECT_NEAR(Sigmoid::init_stddev(100), sqrt(1.0 / 100), 1e-6);
 }
 
 TEST(ActivationFunctionTest, ReLUTest) {
-  ReLU relu;
   std::vector<float> input = {0.0, 5.0, -5.0, 0.0001, -0.0001};
   std::vector<float> expected = {0.0, 5.0, 0.0, 0.0001, 0.0};
 
   std::vector<float> test = input;
-  relu(test);
+  ReLU::apply(test);
 
   ASSERT_EQ(test.size(), expected.size());
   for (size_t i = 0; i < test.size(); i++) {
@@ -36,7 +34,7 @@ TEST(ActivationFunctionTest, ReLUTest) {
   }
 
   // Test initialization standard deviation
-  EXPECT_NEAR(relu.init_stddev(100), sqrt(2.0 / 100), 1e-6);
+  EXPECT_NEAR(ReLU::init_stddev(100), sqrt(2.0 / 100), 1e-6);
 }
 
 TEST(ActivationFunctionTest, SoftMaxTest) {
@@ -44,7 +42,7 @@ TEST(ActivationFunctionTest, SoftMaxTest) {
   std::vector<float> input = {1.0, 2.0, 3.0, 4.0, 1.0};
   std::vector<float> test = input;
 
-  softmax(test);
+  SoftMax::apply(test);
 
   // Test properties of softmax
   ASSERT_EQ(test.size(), input.size());
diff --git a/tests/unit_tests/test_neural_net.cpp b/tests/unit_tests/test_neural_net.cpp
index d959903..f74d83b 100644
--- a/tests/unit_tests/test_neural_net.cpp
+++ b/tests/unit_tests/test_neural_net.cpp
@@ -3,7 +3,6 @@
 #include <cmath>
 #include <gtest/gtest.h>
 #include <stdexcept>
-#include <vector>
 
 class NeuralNetTest : public ::testing::Test {
 protected:
@@ -38,13 +37,11 @@ TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) {
 
   Matrix<float> Z1 = weights[0] * X;
   // Apply sigmoid activation
-  Sigmoid sigmoid;
-  sigmoid(Z1.data());
+  Sigmoid::apply(Z1.data());
 
   // Second layer: Z2 = W2 * A1
   Matrix<float> Z2 = weights[1] * Z1;
-  SoftMax softmax;
-  softmax(Z2.data());
+  SoftMax::apply(Z2.data());
 
   // Convert to output vector
   std::vector<float> expected_output(Z2.cols());
@@ -96,9 +93,8 @@ TEST_F(NeuralNetTest, FeedForward_IdentityTest) {
 
   // Since we're using sigmoid activation, the output should be
   // sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron
-  SoftMax softmax;
   std::vector<float> expected_output = input;
-  softmax(expected_output);
+  SoftMax::apply(expected_output);
 
   for (float val : output) {
     EXPECT_NEAR(val, expected_output[0], 1e-6);