diff --git a/src/activation_function.hpp b/src/activation_function.hpp
index 79a69b8..fd20741 100644
--- a/src/activation_function.hpp
+++ b/src/activation_function.hpp
@@ -3,17 +3,49 @@
 
 #include <algorithm>
 #include <cmath>
+#include <numeric>
+#include <vector>
 /**
  * Functor to set the activation function as a Sigmoid function
  */
 struct Sigmoid {
-  double operator()(double z) { return 1 / (1 + exp(-z)); };
+  void operator()(std::vector<float> &z) {
+    for (size_t i = 0; i < z.size(); i++) {
+      z[i] = 1 / (1 + exp(-z[i]));
+    };
+  };
+  double init_stddev(int n) { return sqrt(1.0 / n); };
 };
 
 /**
  * Functor to set the activation function as Rectified Linear Unit
  */
 struct ReLU {
-  double operator()(double z) { return std::max(z, 0.0); };
+  void operator()(std::vector<float> &z) {
+    for (size_t i = 0; i < z.size(); i++) {
+      z[i] = std::max(0.0f, z[i]);
+    };
+  };
+  double init_stddev(int n) { return sqrt(2.0 / n); };
 };
+
+/**
+ * SoftMax Activation function.
+ * This is generally used in the final output layer
+ */
+struct SoftMax {
+  void operator()(std::vector<float> &z) {
+    float zmax = *std::max_element(z.begin(), z.end());
+    float sum = 0.0;
+    for (size_t i = 0; i < z.size(); i++) {
+      z[i] = exp(z[i] - zmax);
+      sum += z[i];
+    };
+    for (size_t i = 0; i < z.size(); i++) {
+      z[i] = z[i] / sum;
+    };
+  };
+  double init_stddev(int n) { return sqrt(1.0 / n); };
+};
+
 #endif
diff --git a/src/neural_net.cpp b/src/neural_net.cpp
new file mode 100644
index 0000000..c2ba86c
--- /dev/null
+++ b/src/neural_net.cpp
@@ -0,0 +1,30 @@
+#include "neural_net.hpp"
+#include <functional>
+#include <numeric>
+#include <random>
+#include <vector>
+
+template <class ActivationFunction>
+NeuralNet<ActivationFunction>::NeuralNet(std::vector<size_t> &layer_sizes)
+    : m_sizes(layer_sizes) {
+  int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), 0,
+                                      std::plus<size_t>());
+  // Initialize the activation function
+  m_activation_func = ActivationFunction();
+
+  // Create random sampling device
+  std::random_device rd{};
+  std::mt19937 gen{rd()};
+  std::normal_distribution<float> dist{0.0, 1.0};
+
+  // Initialize the weights
+  m_weights.reserve(total_neurons);
+  int start_idx = 0;
+  for (auto size : m_sizes) {
+    for (int i = 0; i < size; i++) {
+      m_weights[i + start_idx] =
+          dist(gen) * m_activation_func.init_stddev(size);
+    }
+    start_idx += size;
+  }
+}
diff --git a/src/neural_net.hpp b/src/neural_net.hpp
index 32bb5fb..4e8cf34 100644
--- a/src/neural_net.hpp
+++ b/src/neural_net.hpp
@@ -1,5 +1,15 @@
 #ifndef NEURAL_NET_H
 #define NEURAL_NET_H
 
-template <class ActivationFunction> class NeuralNet : ActivationFunction {};
+#include <vector>
+template <class ActivationFunction> class NeuralNet {
+public:
+  NeuralNet(std::vector<size_t> &layer_sizes);
+
+private:
+  ActivationFunction m_activation_func;
+  std::vector<size_t> m_sizes;
+  std::vector<double> m_weights;
+  std::vector<float> feed_forward(std::vector<float> x);
+};
 #endif
diff --git a/tests/unit_tests/test_activation_functions.cpp b/tests/unit_tests/test_activation_functions.cpp
index c444f78..2e47fa7 100644
--- a/tests/unit_tests/test_activation_functions.cpp
+++ b/tests/unit_tests/test_activation_functions.cpp
@@ -1,41 +1,75 @@
 #include <gtest/gtest.h>
 #include "../../src/activation_function.hpp"
 #include <cmath>
+#include <vector>
 
 TEST(ActivationFunctionTest, SigmoidTest) {
     Sigmoid sigmoid;
+    std::vector<float> input = {0.0, 10.0, -10.0, 1.0, -1.0};
+    std::vector<float> expected = {
+        0.5,
+        0.9999546,
+        0.0000454,
+        1.0 / (1.0 + exp(-1.0)),
+        1.0 / (1.0 + exp(1.0))
+    };
     
-    // Test sigmoid at x = 0 (should be 0.5)
-    EXPECT_NEAR(sigmoid(0.0), 0.5, 1e-6);
+    std::vector<float> test = input;
+    sigmoid(test);
     
-    // Test sigmoid at large positive value (should approach 1)
-    EXPECT_NEAR(sigmoid(10.0), 1.0, 1e-4);
+    ASSERT_EQ(test.size(), expected.size());
+    for (size_t i = 0; i < test.size(); i++) {
+        EXPECT_NEAR(test[i], expected[i], 1e-6);
+    }
     
-    // Test sigmoid at large negative value (should approach 0)
-    EXPECT_NEAR(sigmoid(-10.0), 0.0, 1e-4);
-    
-    // Test sigmoid at x = 1
-    EXPECT_NEAR(sigmoid(1.0), 1.0 / (1.0 + exp(-1.0)), 1e-6);
-    
-    // Test sigmoid at x = -1
-    EXPECT_NEAR(sigmoid(-1.0), 1.0 / (1.0 + exp(1.0)), 1e-6);
+    // Test initialization standard deviation
+    EXPECT_NEAR(sigmoid.init_stddev(100), sqrt(1.0/100), 1e-6);
 }
 
 TEST(ActivationFunctionTest, ReLUTest) {
     ReLU relu;
+    std::vector<float> input = {0.0, 5.0, -5.0, 0.0001, -0.0001};
+    std::vector<float> expected = {0.0, 5.0, 0.0, 0.0001, 0.0};
     
-    // Test ReLU at x = 0 (should be 0)
-    EXPECT_DOUBLE_EQ(relu(0.0), 0.0);
+    std::vector<float> test = input;
+    relu(test);
     
-    // Test ReLU at positive value (should be same value)
-    EXPECT_DOUBLE_EQ(relu(5.0), 5.0);
+    ASSERT_EQ(test.size(), expected.size());
+    for (size_t i = 0; i < test.size(); i++) {
+        EXPECT_FLOAT_EQ(test[i], expected[i]);
+    }
     
-    // Test ReLU at negative value (should be 0)
-    EXPECT_DOUBLE_EQ(relu(-5.0), 0.0);
-    
-    // Test ReLU at very small positive value
-    EXPECT_DOUBLE_EQ(relu(0.0001), 0.0001);
-    
-    // Test ReLU at very small negative value
-    EXPECT_DOUBLE_EQ(relu(-0.0001), 0.0);
+    // Test initialization standard deviation
+    EXPECT_NEAR(relu.init_stddev(100), sqrt(2.0/100), 1e-6);
+}
+
+TEST(ActivationFunctionTest, SoftMaxTest) {
+    SoftMax softmax;
+    std::vector<float> input = {1.0, 2.0, 3.0, 4.0, 1.0};
+    std::vector<float> test = input;
+    
+    softmax(test);
+    
+    // Test properties of softmax
+    ASSERT_EQ(test.size(), input.size());
+    
+    // Sum should be approximately 1
+    float sum = 0.0;
+    for (float val : test) {
+        sum += val;
+        // All values should be between 0 and 1
+        EXPECT_GE(val, 0.0);
+        EXPECT_LE(val, 1.0);
+    }
+    EXPECT_NEAR(sum, 1.0, 1e-6);
+    
+    // Higher input should lead to higher output
+    for (size_t i = 0; i < test.size() - 1; i++) {
+        if (input[i] < input[i + 1]) {
+            EXPECT_LT(test[i], test[i + 1]);
+        }
+    }
+    
+    // Test initialization standard deviation
+    EXPECT_NEAR(softmax.init_stddev(100), sqrt(1.0/100), 1e-6);
 }