Refactor to make activation functions static

This commit is contained in:
Alex Selimov 2025-03-29 23:25:29 -04:00
parent a578cc0c5b
commit 47ef7c25d7
4 changed files with 18 additions and 29 deletions

View File

@ -8,13 +8,13 @@
* Functor to set the activation function as a Sigmoid function * Functor to set the activation function as a Sigmoid function
*/ */
struct Sigmoid { struct Sigmoid {
void operator()(std::vector<float> &z) { void static apply(std::vector<float> &z) {
for (size_t i = 0; i < z.size(); i++) { for (size_t i = 0; i < z.size(); i++) {
z[i] = 1 / (1 + exp(-z[i])); z[i] = 1 / (1 + exp(-z[i]));
}; };
}; };
float init_stddev(int n) { return sqrt(1.0 / n); }; float static init_stddev(int n) { return sqrt(1.0 / n); };
float derivative(float x) { float static derivative(float x) {
float exp_x = exp(-x); float exp_x = exp(-x);
return exp_x / pow(exp_x + 1.0, 2.0); return exp_x / pow(exp_x + 1.0, 2.0);
} }
@ -24,13 +24,13 @@ struct Sigmoid {
* Functor to set the activation function as Rectified Linear Unit * Functor to set the activation function as Rectified Linear Unit
*/ */
struct ReLU { struct ReLU {
void operator()(std::vector<float> &z) { void static apply(std::vector<float> &z) {
for (size_t i = 0; i < z.size(); i++) { for (size_t i = 0; i < z.size(); i++) {
z[i] = std::max(0.0f, z[i]); z[i] = std::max(0.0f, z[i]);
}; };
}; };
float init_stddev(int n) { return sqrt(2.0 / n); }; float static init_stddev(int n) { return sqrt(2.0 / n); };
float derivative(float x) { float static derivative(float x) {
if (x < 0) { if (x < 0) {
return 0; return 0;
} else { } else {
@ -44,7 +44,7 @@ struct ReLU {
* This is generally used in the final output layer * This is generally used in the final output layer
*/ */
struct SoftMax { struct SoftMax {
void operator()(std::vector<float> &z) { void static apply(std::vector<float> &z) {
float zmax = *std::max_element(z.begin(), z.end()); float zmax = *std::max_element(z.begin(), z.end());
float sum = 0.0; float sum = 0.0;
for (size_t i = 0; i < z.size(); i++) { for (size_t i = 0; i < z.size(); i++) {

View File

@ -9,9 +9,6 @@
template <class ActivationFunction> class NeuralNet { template <class ActivationFunction> class NeuralNet {
public: public:
NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) { NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) {
// Initialize the activation function
m_activation_func = ActivationFunction();
// Create random sampling device // Create random sampling device
std::random_device rd{}; std::random_device rd{};
std::mt19937 gen{rd()}; std::mt19937 gen{rd()};
@ -26,7 +23,7 @@ public:
Matrix<float> W(rows, cols, 0.0); Matrix<float> W(rows, cols, 0.0);
for (size_t j = 0; j < rows; j++) { for (size_t j = 0; j < rows; j++) {
for (size_t k = 0; k < cols; k++) { for (size_t k = 0; k < cols; k++) {
W(j, k) = dist(gen) * m_activation_func.init_stddev(cols); W(j, k) = dist(gen) * ActivationFunction::init_stddev(cols);
} }
} }
m_weights.push_back(W); m_weights.push_back(W);
@ -78,13 +75,13 @@ public:
Matrix Z = m_weights[i] * A; Matrix Z = m_weights[i] * A;
// Apply activation function // Apply activation function
m_activation_func(Z.data()); ActivationFunction::apply(Z.data());
A = Z; A = Z;
} }
// Always use soft max for the final layer // Always use soft max for the final layer
Matrix Z = m_weights.back() * A; Matrix Z = m_weights.back() * A;
m_soft_max(Z.data()); SoftMax::apply(Z.data());
// Convert final output to vector // Convert final output to vector
std::vector<float> output(Z.rows()); std::vector<float> output(Z.rows());
@ -95,8 +92,6 @@ public:
}; };
private: private:
ActivationFunction m_activation_func;
SoftMax m_soft_max;
std::vector<size_t> m_sizes; std::vector<size_t> m_sizes;
std::vector<Matrix<float>> m_weights; std::vector<Matrix<float>> m_weights;
}; };

View File

@ -4,14 +4,13 @@
#include <vector> #include <vector>
TEST(ActivationFunctionTest, SigmoidTest) { TEST(ActivationFunctionTest, SigmoidTest) {
Sigmoid sigmoid;
std::vector<float> input = {0.0, 10.0, -10.0, 1.0, -1.0}; std::vector<float> input = {0.0, 10.0, -10.0, 1.0, -1.0};
std::vector<float> expected = {0.5, 0.9999546, 0.0000454, std::vector<float> expected = {0.5, 0.9999546, 0.0000454,
static_cast<float>(1.0 / (1.0 + exp(-1.0))), static_cast<float>(1.0 / (1.0 + exp(-1.0))),
static_cast<float>(1.0 / (1.0 + exp(1.0)))}; static_cast<float>(1.0 / (1.0 + exp(1.0)))};
std::vector<float> test = input; std::vector<float> test = input;
sigmoid(test); Sigmoid::apply(test);
ASSERT_EQ(test.size(), expected.size()); ASSERT_EQ(test.size(), expected.size());
for (size_t i = 0; i < test.size(); i++) { for (size_t i = 0; i < test.size(); i++) {
@ -19,16 +18,15 @@ TEST(ActivationFunctionTest, SigmoidTest) {
} }
// Test initialization standard deviation // Test initialization standard deviation
EXPECT_NEAR(sigmoid.init_stddev(100), sqrt(1.0 / 100), 1e-6); EXPECT_NEAR(Sigmoid::init_stddev(100), sqrt(1.0 / 100), 1e-6);
} }
TEST(ActivationFunctionTest, ReLUTest) { TEST(ActivationFunctionTest, ReLUTest) {
ReLU relu;
std::vector<float> input = {0.0, 5.0, -5.0, 0.0001, -0.0001}; std::vector<float> input = {0.0, 5.0, -5.0, 0.0001, -0.0001};
std::vector<float> expected = {0.0, 5.0, 0.0, 0.0001, 0.0}; std::vector<float> expected = {0.0, 5.0, 0.0, 0.0001, 0.0};
std::vector<float> test = input; std::vector<float> test = input;
relu(test); ReLU::apply(test);
ASSERT_EQ(test.size(), expected.size()); ASSERT_EQ(test.size(), expected.size());
for (size_t i = 0; i < test.size(); i++) { for (size_t i = 0; i < test.size(); i++) {
@ -36,7 +34,7 @@ TEST(ActivationFunctionTest, ReLUTest) {
} }
// Test initialization standard deviation // Test initialization standard deviation
EXPECT_NEAR(relu.init_stddev(100), sqrt(2.0 / 100), 1e-6); EXPECT_NEAR(ReLU::init_stddev(100), sqrt(2.0 / 100), 1e-6);
} }
TEST(ActivationFunctionTest, SoftMaxTest) { TEST(ActivationFunctionTest, SoftMaxTest) {
@ -44,7 +42,7 @@ TEST(ActivationFunctionTest, SoftMaxTest) {
std::vector<float> input = {1.0, 2.0, 3.0, 4.0, 1.0}; std::vector<float> input = {1.0, 2.0, 3.0, 4.0, 1.0};
std::vector<float> test = input; std::vector<float> test = input;
softmax(test); SoftMax::apply(test);
// Test properties of softmax // Test properties of softmax
ASSERT_EQ(test.size(), input.size()); ASSERT_EQ(test.size(), input.size());

View File

@ -3,7 +3,6 @@
#include <cmath> #include <cmath>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <stdexcept> #include <stdexcept>
#include <vector>
class NeuralNetTest : public ::testing::Test { class NeuralNetTest : public ::testing::Test {
protected: protected:
@ -38,13 +37,11 @@ TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) {
Matrix<float> Z1 = weights[0] * X; Matrix<float> Z1 = weights[0] * X;
// Apply sigmoid activation // Apply sigmoid activation
Sigmoid sigmoid; Sigmoid::apply(Z1.data());
sigmoid(Z1.data());
// Second layer: Z2 = W2 * A1 // Second layer: Z2 = W2 * A1
Matrix<float> Z2 = weights[1] * Z1; Matrix<float> Z2 = weights[1] * Z1;
SoftMax softmax; SoftMax::apply(Z2.data());
softmax(Z2.data());
// Convert to output vector // Convert to output vector
std::vector<float> expected_output(Z2.cols()); std::vector<float> expected_output(Z2.cols());
@ -96,9 +93,8 @@ TEST_F(NeuralNetTest, FeedForward_IdentityTest) {
// Since we're using sigmoid activation, the output should be // Since we're using sigmoid activation, the output should be
// sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron // sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron
SoftMax softmax;
std::vector<float> expected_output = input; std::vector<float> expected_output = input;
softmax(expected_output); SoftMax::apply(expected_output);
for (float val : output) { for (float val : output) {
EXPECT_NEAR(val, expected_output[0], 1e-6); EXPECT_NEAR(val, expected_output[0], 1e-6);