diff --git a/CMakeLists.txt b/CMakeLists.txt
index 37d8dc3..466773a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,12 @@ add_subdirectory(tests)
 
 target_link_libraries(${CMAKE_PROJECT_NAME}_run ${CMAKE_PROJECT_NAME}_lib)
 
+find_package(OpenMP)
+if(OpenMP_CXX_FOUND)
+    target_link_libraries(${CMAKE_PROJECT_NAME}_run OpenMP::OpenMP_CXX)
+endif()
+
+
 # Doxygen Build
 option(BUILD_DOC "Build Documentation" ON)
 
diff --git a/src/activation_function.hpp b/src/activation_function.hpp
index 34269cf..3a748f2 100644
--- a/src/activation_function.hpp
+++ b/src/activation_function.hpp
@@ -13,7 +13,11 @@ struct Sigmoid {
       z[i] = 1 / (1 + exp(-z[i]));
     };
   };
-  double init_stddev(int n) { return sqrt(1.0 / n); };
+  float init_stddev(int n) { return sqrt(1.0 / n); };
+  float derivative(float x) {
+    float exp_x = exp(-x);
+    return exp_x / pow(exp_x + 1.0, 2.0);
+  }
 };
 
 /**
@@ -25,7 +29,14 @@ struct ReLU {
       z[i] = std::max(0.0f, z[i]);
     };
   };
-  double init_stddev(int n) { return sqrt(2.0 / n); };
+  float init_stddev(int n) { return sqrt(2.0 / n); };
+  float derivative(float x) {
+    if (x < 0) {
+      return 0;
+    } else {
+      return 1;
+    }
+  };
 };
 
 /**
@@ -44,7 +55,7 @@ struct SoftMax {
       z[i] = z[i] / sum;
     };
   };
-  double init_stddev(int n) { return sqrt(1.0 / n); };
+  float init_stddev(int n) { return sqrt(1.0 / n); };
 };
 
 #endif
diff --git a/src/matrix.hpp b/src/matrix.hpp
new file mode 100644
index 0000000..10362ed
--- /dev/null
+++ b/src/matrix.hpp
@@ -0,0 +1,111 @@
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <stdexcept>
+#include <vector>
+
+template <typename T> class Matrix {
+public:
+  // Create an empty matrix
+  Matrix() : m_rows(0), m_cols(0) {}
+
+  // Create a matrix with specified dimensions and initialize with value
+  Matrix(size_t rows, size_t cols, T value)
+      : m_rows(rows), m_cols(cols), m_data(rows * cols, value) {}
+
+  // Create a matrix from a 1d vector using move semantics
+  Matrix(size_t rows, size_t cols, std::vector<T> data)
+      : m_rows(rows), m_cols(cols), m_data(data) {
+
+    if (m_rows * m_cols != m_data.size()) {
+      throw std::invalid_argument(
+          "The size of input data doesn't match the provided dimensions");
+    }
+  }
+
+  // Create a matrix from a vector of vectors
+  Matrix(const std::vector<std::vector<T>> &data) {
+    if (data.empty()) {
+      m_rows = 0;
+      m_cols = 0;
+      return;
+    }
+
+    m_rows = data.size();
+    m_cols = data[0].size();
+    m_data.resize(m_rows * m_cols);
+
+    for (size_t i = 0; i < m_rows; ++i) {
+      if (data[i].size() != m_cols) {
+        throw std::invalid_argument(
+            "All rows must have the same number of columns");
+      }
+      for (size_t j = 0; j < m_cols; ++j) {
+        m_data[i * m_cols + j] = data[i][j];
+      }
+    }
+  }
+
+  // Access element (row, col)
+  float &operator()(size_t row, size_t col) {
+    check_bounds(row, col);
+    return m_data[row * m_cols + col];
+  }
+
+  const float &operator()(size_t row, size_t col) const {
+    check_bounds(row, col);
+    return m_data[row * m_cols + col];
+  }
+
+  // Matrix multiplication
+  Matrix operator*(const Matrix &other) const {
+    if (m_cols != other.m_rows) {
+      throw std::invalid_argument(
+          "Matrix dimensions do not match for multiplication");
+    }
+
+    Matrix result(m_rows, other.m_cols, 0.0);
+#pragma omp parallel for
+    for (size_t i = 0; i < m_rows; ++i) {
+      for (size_t j = 0; j < other.m_cols; ++j) {
+        float sum = 0.0f;
+        for (size_t k = 0; k < m_cols; ++k) {
+          sum += (*this)(i, k) * other(k, j);
+        }
+        result(i, j) = sum;
+      }
+    }
+    return result;
+  }
+
+  // Get matrix dimensions
+  size_t rows() const { return m_rows; }
+  size_t cols() const { return m_cols; }
+
+  // Get raw data access
+  std::vector<float> &data() { return m_data; }
+
+  // Transpose the matrix
+  Matrix transpose() const {
+    Matrix result(m_cols, m_rows);
+    for (size_t i = 0; i < m_rows; ++i) {
+      for (size_t j = 0; j < m_cols; ++j) {
+        result(j, i) = (*this)(i, j);
+      }
+    }
+    return result;
+  }
+
+private:
+  void check_bounds(size_t row, size_t col) const {
+    if (row >= m_rows || col >= m_cols) {
+      throw std::out_of_range("Matrix index out of bounds");
+    }
+  }
+
+  size_t m_rows;
+  size_t m_cols;
+  std::vector<T> m_data;
+};
+
+#endif // MATRIX_H
diff --git a/src/neural_net.cpp b/src/neural_net.cpp
deleted file mode 100644
index c4a302d..0000000
--- a/src/neural_net.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#include "neural_net.hpp"
-#include "utility.hpp"
-#include <functional>
-#include <numeric>
-#include <random>
-#include <vector>
-
-template <class ActivationFunction>
-NeuralNet<ActivationFunction>::NeuralNet(std::vector<size_t> &layer_sizes)
-    : m_sizes(layer_sizes) {
-  int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(), 0,
-                                      std::plus<size_t>());
-  // Initialize the activation function
-  m_activation_func = ActivationFunction();
-
-  // Create random sampling device
-  std::random_device rd{};
-  std::mt19937 gen{rd()};
-  std::normal_distribution<float> dist{0.0, 1.0};
-
-  // Initialize the weights
-  m_weights.reserve(total_neurons);
-  int start_idx = 0;
-  for (auto size : m_sizes) {
-    for (int i = 0; i < size; i++) {
-      m_weights[i + start_idx] =
-          dist(gen) * m_activation_func.init_stddev(size);
-    }
-    start_idx += size;
-  }
-}
-
-/** Pass input vector through the neural network.
- * This is a fully connected neural network geometry.
- * @param x Input vector
- * @return output of feed forward phase
- */
-template <class ActivationFunction>
-std::vector<float>
-NeuralNet<ActivationFunction>::feed_forward(std::vector<float> &x) {
-  std::vector<float> A = x;
-  int start_idx = 0;
-
-  // Feed each layer forward except the last layer using the user specified
-  // activation function
-  for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) {
-    // Get the iterator range for the current layer
-    auto layer_start = m_weights.begin() + start_idx;
-    auto layer_end = m_weights.end() + start_idx + *size;
-
-    std::vector<float> Anew = Utilities::feed_layer<ActivationFunction>(
-        layer_start, layer_end, &A, m_activation_func);
-    if (Anew.size() > A.capacity()) {
-      A.reserve(Anew.size());
-    }
-    std::move(Anew.begin(), Anew.end(), A.begin());
-    start_idx += *size;
-  }
-
-  // Always use soft max for the final layer
-  auto last_layer_start = m_weights.begin() + start_idx;
-  auto output = Utilities::feed_layer<SoftMax>(last_layer_start,
-                                               m_weights.end(), A, m_soft_max);
-  return output;
-}
diff --git a/src/neural_net.hpp b/src/neural_net.hpp
index c4f7f48..2aae21a 100644
--- a/src/neural_net.hpp
+++ b/src/neural_net.hpp
@@ -2,14 +2,13 @@
 #define NEURAL_NET_H
 
 #include "activation_function.hpp"
-#include "utility.hpp"
+#include "matrix.hpp"
 #include <random>
 #include <vector>
+
 template <class ActivationFunction> class NeuralNet {
 public:
   NeuralNet(std::vector<size_t> &layer_sizes) : m_sizes(layer_sizes) {
-    int total_neurons = std::accumulate(layer_sizes.begin(), layer_sizes.end(),
-                                        0, std::plus<size_t>());
     // Initialize the activation function
     m_activation_func = ActivationFunction();
 
@@ -18,54 +17,87 @@ public:
     std::mt19937 gen{rd()};
     std::normal_distribution<float> dist{0.0, 1.0};
 
-    // Initialize the weights
-    m_weights.reserve(total_neurons);
-    int start_idx = 0;
-    for (auto size : m_sizes) {
-      for (int i = 0; i < size; i++) {
-        m_weights[i + start_idx] =
-            dist(gen) * m_activation_func.init_stddev(size);
+    // Initialize weights for each layer connection
+    for (size_t i = 0; i < m_sizes.size() - 1; i++) {
+      size_t rows = m_sizes[i + 1]; // neurons in next layer
+      size_t cols = m_sizes[i];     // neurons in current layer
+
+      // Create and initialize weight matrix
+      Matrix<float> W(rows, cols, 0.0);
+      for (size_t j = 0; j < rows; j++) {
+        for (size_t k = 0; k < cols; k++) {
+          W(j, k) = dist(gen) * m_activation_func.init_stddev(cols);
+        }
       }
-      start_idx += size;
+      m_weights.push_back(W);
     }
   };
 
-private:
-  ActivationFunction m_activation_func;
-  SoftMax m_soft_max;
-  std::vector<size_t> m_sizes;
-  std::vector<float> m_weights;
+  // Set new weights for the network
+  void set_weights(const std::vector<Matrix<float>> &new_weights) {
+    // Validate new weights
+    if (new_weights.empty()) {
+      throw std::invalid_argument("Weights vector cannot be empty");
+    }
+
+    // Validate layer sizes match
+    if (new_weights.size() != m_weights.size()) {
+      throw std::invalid_argument(
+          "Number of weight matrices doesn't match network architecture");
+    }
+
+    // Validate layer connectivity
+    for (size_t i = 0; i < new_weights.size(); i++) {
+      if (new_weights[i].rows() != m_weights[i].rows()) {
+        throw std::invalid_argument(
+            "New weight matrix rows don't match existing architecture");
+      }
+      if (new_weights[i].cols() != m_weights[i].cols()) {
+        throw std::invalid_argument(
+            "New weight matrix columns don't match existing architecture");
+      }
+    }
+
+    // Update weights
+    m_weights = new_weights;
+  };
 
   /** Pass input vector through the neural network.
    * This is a fully connected neural network geometry.
    * @param x Input vector
    * @return output of feed forward phase
    */
-  std::vector<float> feed_forward(std::vector<float> &x) {
-    std::vector<float> A = x;
-    int start_idx = 0;
+  std::vector<float> feed_forward(const std::vector<float> &x) {
+    // Convert input vector to matrix
+    Matrix<float> A = Matrix<float>(x.size(), 1, x);
 
     // Feed each layer forward except the last layer using the user specified
     // activation function
-    for (auto size = m_sizes.begin(); size < m_sizes.end() - 1; size++) {
-      // Get the iterator range for the current layer
-      auto layer_start = m_weights.begin() + start_idx;
-      auto layer_end = m_weights.end() + start_idx + *size;
+    for (size_t i = 0; i < m_sizes.size() - 2; i++) {
+      // Calculate Z = W * A
+      Matrix Z = m_weights[i] * A;
 
-      std::vector<float> Anew = Utilities::feed_layer<ActivationFunction>(
-          layer_start, layer_end, &A, m_activation_func);
-      if (Anew.size() > A.capacity()) {
-        A.reserve(Anew.size());
-      }
-      std::move(Anew.begin(), Anew.end(), A.begin());
-      start_idx += *size;
+      // Apply activation function
+      m_activation_func(Z.data());
+      A = Z;
     }
 
     // Always use soft max for the final layer
-    auto last_layer_start = m_weights.begin() + start_idx;
-    auto output = Utilities::feed_layer<SoftMax>(
-        last_layer_start, m_weights.end(), A, m_soft_max);
+    Matrix Z = m_weights.back() * A;
+    m_soft_max(Z.data());
+
+    // Convert final output to vector
+    std::vector<float> output(Z.rows());
+    for (size_t i = 0; i < Z.rows(); i++) {
+      output[i] = Z(i, 0);
+    }
     return output;
   };
+
+private:
+  ActivationFunction m_activation_func;
+  SoftMax m_soft_max;
+  std::vector<size_t> m_sizes;
+  std::vector<Matrix<float>> m_weights;
 };
 #endif
diff --git a/src/utility.hpp b/src/utility.hpp
index 16cdede..a5d8b4d 100644
--- a/src/utility.hpp
+++ b/src/utility.hpp
@@ -1,31 +1,5 @@
 #ifndef UTILITY_H
 #define UTILITY_H
 
-#include <algorithm>
-#include <iterator>
-#include <numeric>
-#include <vector>
-
-namespace Utilities {
-
-template <class ActivationFunction>
-std::vector<float> feed_layer(std::vector<float>::iterator weight_start,
-                              std::vector<float>::iterator weight_end,
-                              std::vector<float> &A,
-                              ActivationFunction activation_func) {
-  // Calculate the new A vector from the current weights
-  std::vector<float> Anew;
-  Anew.reserve(std::distance(weight_start, weight_end));
-  std::transform(
-      weight_start, weight_end, std::back_inserter(Anew), [&A](float weight) {
-        float summed_weight = std::accumulate(
-            A.begin(), A.end(), 0.0f,
-            [&weight](float acc, float a) { return acc + a * weight; });
-        return summed_weight;
-      });
-  activation_func(Anew);
-  return Anew;
-};
-
-} // namespace Utilities
+namespace Utilities {} // namespace Utilities
 #endif
diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt
index 8f1605f..a6bdde9 100644
--- a/tests/unit_tests/CMakeLists.txt
+++ b/tests/unit_tests/CMakeLists.txt
@@ -1,8 +1,12 @@
 include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
 
-add_executable(Unit_Tests_run
+set(TEST_SOURCES
     test_activation_functions.cpp
-    test_utility.cpp
+    test_neural_net.cpp
+)
+
+add_executable(Unit_Tests_run
+    ${TEST_SOURCES}
 )
 
 target_link_libraries(Unit_Tests_run gtest gtest_main)
diff --git a/tests/unit_tests/test_neural_net.cpp b/tests/unit_tests/test_neural_net.cpp
new file mode 100644
index 0000000..d959903
--- /dev/null
+++ b/tests/unit_tests/test_neural_net.cpp
@@ -0,0 +1,123 @@
+#include "../src/activation_function.hpp"
+#include "../src/neural_net.hpp"
+#include <cmath>
+#include <gtest/gtest.h>
+#include <stdexcept>
+#include <vector>
+
+class NeuralNetTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    // Create a simple neural network with 2 input neurons, 2 hidden neurons,
+    // and 2 output neurons
+    std::vector<size_t> layer_sizes = {2, 2, 2};
+    net = std::make_unique<NeuralNet<Sigmoid>>(layer_sizes);
+  }
+
+  std::unique_ptr<NeuralNet<Sigmoid>> net;
+};
+
+TEST_F(NeuralNetTest, FeedForward_SimpleNetwork) {
+  // Test a simple network with known weights and inputs
+  std::vector<float> input = {0.5f, 0.5f};
+
+  // Set known weights for testing
+  std::vector<Matrix<float>> weights = {
+      Matrix<float>(2, 2, 0.5f), // First layer weights
+      Matrix<float>(2, 2, 0.5f)  // Output layer weights
+  };
+
+  // Replace the network's weights with our test weights
+  net->set_weights(weights);
+
+  // Calculate expected output manually
+  // First layer: Z1 = W1 * X
+  Matrix<float> X(2, 1, 0.0);
+  X(0, 0) = input[0];
+  X(1, 0) = input[1];
+
+  Matrix<float> Z1 = weights[0] * X;
+  // Apply sigmoid activation
+  Sigmoid sigmoid;
+  sigmoid(Z1.data());
+
+  // Second layer: Z2 = W2 * A1
+  Matrix<float> Z2 = weights[1] * Z1;
+  SoftMax softmax;
+  softmax(Z2.data());
+
+  // Convert to output vector
+  std::vector<float> expected_output(Z2.cols());
+  for (size_t i = 0; i < Z2.rows(); i++) {
+    expected_output[i] = Z2(i, 0);
+  }
+
+  // Get actual output from feed_forward
+  std::vector<float> output = net->feed_forward(input);
+
+  // Compare actual and expected outputs
+  for (size_t i = 0; i < output.size(); i++) {
+    EXPECT_NEAR(output[i], expected_output[i], 1e-6);
+  }
+}
+
+TEST_F(NeuralNetTest, FeedForward_DifferentLayerSizes) {
+  // Create a network with different layer sizes
+  std::vector<size_t> layer_sizes = {3, 4, 2};
+  NeuralNet<Sigmoid> net2(layer_sizes);
+
+  std::vector<float> input = {0.1f, 0.2f, 0.3f};
+  std::vector<float> output = net2.feed_forward(input);
+
+  // Output should have 2 elements (size of last layer)
+  EXPECT_EQ(output.size(), 2);
+}
+
+TEST_F(NeuralNetTest, FeedForward_InvalidInputSize) {
+  std::vector<float> input = {0.1f}; // Only 1 input, but network expects 2
+
+  // This should throw an exception since input size doesn't match first layer
+  // size
+  EXPECT_THROW(net->feed_forward(input), std::invalid_argument);
+}
+
+TEST_F(NeuralNetTest, FeedForward_IdentityTest) {
+  // Create a network with identity weights (1.0) and no bias
+  std::vector<size_t> layer_sizes = {2, 2};
+  NeuralNet<Sigmoid> net2(layer_sizes);
+
+  // Set weights to identity matrix
+  std::vector<Matrix<float>> weights = {Matrix<float>(2, 2, 1.0f)};
+
+  net2.set_weights(weights);
+
+  std::vector<float> input = {0.5f, 0.5f};
+  std::vector<float> output = net2.feed_forward(input);
+
+  // Since we're using sigmoid activation, the output should be
+  // sigmoid(0.5 + 0.5) = sigmoid(1.0) for each neuron
+  SoftMax softmax;
+  std::vector<float> expected_output = input;
+  softmax(expected_output);
+
+  for (float val : output) {
+    EXPECT_NEAR(val, expected_output[0], 1e-6);
+  }
+}
+
+TEST_F(NeuralNetTest, FeedForward_SoftmaxOutput) {
+  std::vector<float> input = {1.0f, -1.0f};
+  std::vector<float> output = net->feed_forward(input);
+
+  // Verify that the output sums to 1 (property of softmax)
+  float sum = 0.0f;
+  for (float val : output) {
+    sum += val;
+  }
+  EXPECT_NEAR(sum, 1.0f, 1e-6);
+
+  // Verify that all outputs are positive
+  for (float val : output) {
+    EXPECT_GT(val, 0.0f);
+  }
+}
diff --git a/tests/unit_tests/test_utility.cpp b/tests/unit_tests/test_utility.cpp
deleted file mode 100644
index 07cca8b..0000000
--- a/tests/unit_tests/test_utility.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#include "activation_function.hpp"
-#include "utility.hpp"
-#include <cmath>
-#include <gtest/gtest.h>
-
-// Simple identity activation function for testing
-struct Identity {
-  void operator()(std::vector<float> &x) const {
-    // Identity function - no change to values
-    // Below statement is needed to remove compiler warning about unused var
-    // warning
-    (void)x;
-  }
-};
-
-TEST(UtilityTest, FeedLayerIdentityTest) {
-  // Test with identity activation function for simple verification
-  // Input: [1, 2]
-  // Weights: [0.5, -0.5, 1.0, -1.0]
-
-  std::vector<float> weights = {0.5, -0.5, 1.0, -1.0};
-  std::vector<float> input = {1.0, 2.0};
-  Identity identity;
-
-  auto output = Utilities::feed_layer<Identity>(weights.begin(), weights.end(),
-                                                input, identity);
-
-  ASSERT_EQ(output.size(), 4);
-  EXPECT_NEAR(output[0], 1.5f, 1e-5);  // 1.0 * 0.5 + 2.0 * 0.5
-  EXPECT_NEAR(output[1], -1.5f, 1e-5); // 1.0 * -0.5 + 2.0 * -0.5
-  EXPECT_NEAR(output[2], 3.0f, 1e-5);  // 1.0 * 1.0 + 2.0 * 1.0
-  EXPECT_NEAR(output[3], -3.0f, 1e-5); // 1.0 * -1.0 + 2.0 * -1.0
-}
-
-TEST(UtilityTest, FeedLayerSigmoidTest) {
-  // Test with sigmoid activation
-  // Input: [1]
-  // Weights: [2, -2]
-  std::vector<float> weights = {2.0, -2.0};
-  std::vector<float> input = {1.0};
-  Sigmoid sigmoid;
-
-  auto output = Utilities::feed_layer<Sigmoid>(weights.begin(), weights.end(),
-                                               input, sigmoid);
-
-  ASSERT_EQ(output.size(), 2);
-  // Note: Sigmoid is applied to the whole vector after matrix multiplication
-  float expected0 = 2.0;  // 1.0 * 2.0
-  float expected1 = -2.0; // 1.0 * -2.0
-  EXPECT_NEAR(output[0], 1.0 / (1.0 + std::exp(-expected0)), 1e-5);
-  EXPECT_NEAR(output[1], 1.0 / (1.0 + std::exp(-expected1)), 1e-5);
-}
-
-TEST(UtilityTest, FeedLayerSoftMaxTest) {
-  // Test with softmax activation
-  // Input: [1]
-  // Weights: [2, 2]
-  std::vector<float> weights = {2.0, 2.0};
-  std::vector<float> input = {1.0};
-  SoftMax softmax;
-
-  auto output = Utilities::feed_layer<SoftMax>(weights.begin(), weights.end(),
-                                               input, softmax);
-
-  ASSERT_EQ(output.size(), 2);
-  // Both outputs should be 0.5 since inputs to softmax are equal (both 2.0)
-  EXPECT_NEAR(output[0], 0.5, 1e-5);
-  EXPECT_NEAR(output[1], 0.5, 1e-5);
-}
-
-TEST(UtilityTest, FeedLayerEmptyInput) {
-  std::vector<float> weights = {1.0, 1.0};
-  std::vector<float> input = {};
-  Identity identity;
-
-  auto output = Utilities::feed_layer<Identity>(weights.begin(), weights.end(),
-                                                input, identity);
-
-  ASSERT_EQ(output.size(), 2);
-  EXPECT_NEAR(output[0], 0.0f, 1e-5);
-  EXPECT_NEAR(output[1], 0.0f, 1e-5);
-}