Vec3/tests/cuda_unit_tests/vec3_test.cu

#include "vec3.h"
#include <cuda_runtime.h>
#include <gtest/gtest.h>

// Define kernel function to test Vec3 operations
template <typename T>
__global__ void testVec3Operations(Vec3<T> *results, Vec3<T> a, Vec3<T> b,
                                   T scalar) {
  int idx = threadIdx.x;

  // Test different operations based on thread index
  switch (idx) {
  case 0: // Addition
    results[idx] = a + b;
    break;
  case 1: // Subtraction
    results[idx] = a - b;
    break;
  case 2: // Scale
    results[idx] = a.scale(scalar);
    break;
  case 3: // Dot product - store in x component
    results[idx].x = a.dot(b);
    results[idx].y = 0;
    results[idx].z = 0;
    break;
  case 4: // Cross product
    results[idx] = a.cross(b);
    break;
  case 5: // Squared norm - store in x component
    results[idx].x = a.squared_norm2();
    results[idx].y = 0;
    results[idx].z = 0;
    break;
  case 6: // Norm - store in x component
    results[idx].x = a.norm2();
    results[idx].y = 0;
    results[idx].z = 0;
    break;
  case 7: // Normalized
    results[idx] = a.normalized();
    break;
  }
}

// Test fixture for Vec3 CUDA tests
class Vec3CudaTest : public ::testing::Test {
protected:
  void SetUp() override {
    // Allocate device memory for results
    cudaMalloc(&d_results, NUM_TESTS * sizeof(Vec3<float>));
  }

  void TearDown() override {
    // Free device memory
    cudaFree(d_results);
  }

  // Number of operations to test
  static const int NUM_TESTS = 8;

  // Pointer to device memory for results
  Vec3<float> *d_results;

  // Host memory for results
  Vec3<float> h_results[NUM_TESTS];

  // Test with a reasonable epsilon for floating point comparisons
  float epsilon = 1e-5f;
};

TEST_F(Vec3CudaTest, BasicOperations) {
  // Define test vectors
  Vec3<float> a{1.0f, 2.0f, 3.0f};
  Vec3<float> b{4.0f, 5.0f, 6.0f};
  float scalar = 2.0f;

  // Launch kernel with 8 threads to test different operations
  testVec3Operations<<<1, NUM_TESTS>>>(d_results, a, b, scalar);

  // Check for kernel execution errors
  cudaError_t cudaStatus = cudaGetLastError();
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "Kernel launch failed: " << cudaGetErrorString(cudaStatus);

  // Copy results back to host
  cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
                          cudaMemcpyDeviceToHost);
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);

  // Wait for GPU to finish
  cudaStatus = cudaDeviceSynchronize();
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);

  // Test addition
  EXPECT_NEAR(h_results[0].x, 5.0f, epsilon);
  EXPECT_NEAR(h_results[0].y, 7.0f, epsilon);
  EXPECT_NEAR(h_results[0].z, 9.0f, epsilon);

  // Test subtraction
  EXPECT_NEAR(h_results[1].x, -3.0f, epsilon);
  EXPECT_NEAR(h_results[1].y, -3.0f, epsilon);
  EXPECT_NEAR(h_results[1].z, -3.0f, epsilon);

  // Test scale
  EXPECT_NEAR(h_results[2].x, 2.0f, epsilon);
  EXPECT_NEAR(h_results[2].y, 4.0f, epsilon);
  EXPECT_NEAR(h_results[2].z, 6.0f, epsilon);

  // Test dot product
  EXPECT_NEAR(h_results[3].x, 32.0f, epsilon);

  // Test cross product
  EXPECT_NEAR(h_results[4].x, -3.0f, epsilon);
  EXPECT_NEAR(h_results[4].y, 6.0f, epsilon);
  EXPECT_NEAR(h_results[4].z, -3.0f, epsilon);

  // Test squared norm
  EXPECT_NEAR(h_results[5].x, 14.0f, epsilon);

  // Test norm
  EXPECT_NEAR(h_results[6].x, std::sqrt(14.0f), epsilon);

  // Test normalized
  float norm = std::sqrt(14.0f);
  EXPECT_NEAR(h_results[7].x, 1.0f / norm, epsilon);
  EXPECT_NEAR(h_results[7].y, 2.0f / norm, epsilon);
  EXPECT_NEAR(h_results[7].z, 3.0f / norm, epsilon);
}

TEST_F(Vec3CudaTest, EdgeCases) {
  // Test with zero vector
  Vec3<float> zero{0.0f, 0.0f, 0.0f};
  Vec3<float> nonZero{1.0f, 2.0f, 3.0f};
  float scalar = 5.0f;

  // Launch kernel with 8 threads to test different operations
  testVec3Operations<<<1, NUM_TESTS>>>(d_results, zero, nonZero, scalar);

  // Check for kernel execution errors
  cudaError_t cudaStatus = cudaGetLastError();
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "Kernel launch failed: " << cudaGetErrorString(cudaStatus);

  // Copy results back to host
  cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
                          cudaMemcpyDeviceToHost);
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);

  // Wait for GPU to finish
  cudaStatus = cudaDeviceSynchronize();
  ASSERT_EQ(cudaStatus, cudaSuccess)
      << "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);

  // Test normalized with zero vector (should handle epsilon)
  // Normalized of zero vector should be very small but not NaN
  EXPECT_FALSE(isnan(h_results[7].x));
  EXPECT_FALSE(isnan(h_results[7].y));
  EXPECT_FALSE(isnan(h_results[7].z));

  // Test dot product with zero vector (should be zero)
  EXPECT_NEAR(h_results[3].x, 0.0f, epsilon);

  // Test cross product with zero vector (should be zero)
  EXPECT_NEAR(h_results[4].x, 0.0f, epsilon);
  EXPECT_NEAR(h_results[4].y, 0.0f, epsilon);
  EXPECT_NEAR(h_results[4].z, 0.0f, epsilon);
}

// Main function to run all tests
int main(int argc, char **argv) {
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
}