diff --git a/CMakeLists.txt b/CMakeLists.txt index d2bebc5..d2b2491 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 3.9) set(NAME "Vec3") project(${NAME}) +# Check for CUDA +include(CheckLanguage) +check_language(CUDA) + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) add_compile_options(-Wall -Wextra -Wpedantic) diff --git a/README.md b/README.md index 9cfb835..b68a789 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,5 @@ endif() ## Features * Guards all testing code to only be run when Vec3 is the main project +* Compatible with both CUDA and C++ diff --git a/include/vec3.h b/include/vec3.h index 6e37d1e..54dcf4e 100644 --- a/include/vec3.h +++ b/include/vec3.h @@ -3,35 +3,44 @@ #include #include + +#ifdef __CUDACC__ +#define CUDA_CALLABLE __host__ __device__ +#else +#define CUDA_CALLABLE +#endif + template struct Vec3 { T x; T y; T z; - inline Vec3 operator+(Vec3 other) const { + CUDA_CALLABLE inline Vec3 operator+(Vec3 other) const { return {x + other.x, y + other.y, z + other.z}; }; - inline Vec3 operator-(Vec3 other) const { + CUDA_CALLABLE inline Vec3 operator-(Vec3 other) const { return {x - other.x, y - other.y, z - other.z}; }; - inline Vec3 scale(T scalar) { return {x * scalar, y * scalar, z * scalar}; }; + CUDA_CALLABLE inline Vec3 scale(T scalar) { + return {x * scalar, y * scalar, z * scalar}; + }; - inline T dot(Vec3 other) const { + CUDA_CALLABLE inline T dot(Vec3 other) const { return x * other.x + y * other.y + z * other.z; } - inline Vec3 cross(Vec3 other) const { + CUDA_CALLABLE inline Vec3 cross(Vec3 other) const { return {y * other.z - z * other.y, z * other.x - x * other.z, x * other.y - y * other.x}; } - inline T squared_norm2() const { return x * x + y * y + z * z; } + CUDA_CALLABLE inline T squared_norm2() const { return x * x + y * y + z * z; } - inline T norm2() const { return std::sqrt(squared_norm2()); } + CUDA_CALLABLE inline T norm2() const { return std::sqrt(squared_norm2()); } - inline Vec3 normalized() { + CUDA_CALLABLE inline Vec3 normalized() { // Add epsilon to the norm for stability when the norm is 0 T norm = std::max(norm2(), std::numeric_limits::epsilon()); return {x / norm, y / norm, z / norm}; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 85a8157..ae6b103 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,4 +10,16 @@ if(NOT EXISTS ${GOOGLETEST_DIR}) endif() add_subdirectory(lib/googletest) -add_subdirectory(unit_tests) \ No newline at end of file +add_subdirectory(unit_tests) + +# Only run Cuda tests if cuda is available +if (CMAKE_CUDA_COMPILER) + set(CMAKE_CUDA_ARCHITECTURES 61) + set(CUDA_SEPARABLE_COMPILATION ON) + + add_subdirectory(cuda_unit_tests) + message(STATUS "CUDA found. CUDA tests will be build") +else() + message(STATUS "CUDA not found. Skipping CUDA tests") +endif() + diff --git a/tests/cuda_unit_tests/CMakeLists.txt b/tests/cuda_unit_tests/CMakeLists.txt new file mode 100644 index 0000000..344a8e8 --- /dev/null +++ b/tests/cuda_unit_tests/CMakeLists.txt @@ -0,0 +1,10 @@ +include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) + +add_executable(${NAME}_cuda_tests + vec3_test.cu +) + +target_link_libraries(${NAME}_cuda_tests gtest gtest_main) +target_link_libraries(${NAME}_cuda_tests ${NAME}) + +add_test(NAME Vec3CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests) diff --git a/tests/cuda_unit_tests/vec3_test.cu b/tests/cuda_unit_tests/vec3_test.cu new file mode 100644 index 0000000..f4d7b0f --- /dev/null +++ b/tests/cuda_unit_tests/vec3_test.cu @@ -0,0 +1,177 @@ +#include "vec3.h" +#include +#include + +// Define kernel function to test Vec3 operations +template +__global__ void testVec3Operations(Vec3 *results, Vec3 a, Vec3 b, + T scalar) { + int idx = threadIdx.x; + + // Test different operations based on thread index + switch (idx) { + case 0: // Addition + results[idx] = a + b; + break; + case 1: // Subtraction + results[idx] = a - b; + break; + case 2: // Scale + results[idx] = a.scale(scalar); + break; + case 3: // Dot product - store in x component + results[idx].x = a.dot(b); + results[idx].y = 0; + results[idx].z = 0; + break; + case 4: // Cross product + results[idx] = a.cross(b); + break; + case 5: // Squared norm - store in x component + results[idx].x = a.squared_norm2(); + results[idx].y = 0; + results[idx].z = 0; + break; + case 6: // Norm - store in x component + results[idx].x = a.norm2(); + results[idx].y = 0; + results[idx].z = 0; + break; + case 7: // Normalized + results[idx] = a.normalized(); + break; + } +} + +// Test fixture for Vec3 CUDA tests +class Vec3CudaTest : public ::testing::Test { +protected: + void SetUp() override { + // Allocate device memory for results + cudaMalloc(&d_results, NUM_TESTS * sizeof(Vec3)); + } + + void TearDown() override { + // Free device memory + cudaFree(d_results); + } + + // Number of operations to test + static const int NUM_TESTS = 8; + + // Pointer to device memory for results + Vec3 *d_results; + + // Host memory for results + Vec3 h_results[NUM_TESTS]; + + // Test with a reasonable epsilon for floating point comparisons + float epsilon = 1e-5f; +}; + +TEST_F(Vec3CudaTest, BasicOperations) { + // Define test vectors + Vec3 a{1.0f, 2.0f, 3.0f}; + Vec3 b{4.0f, 5.0f, 6.0f}; + float scalar = 2.0f; + + // Launch kernel with 8 threads to test different operations + testVec3Operations<<<1, NUM_TESTS>>>(d_results, a, b, scalar); + + // Check for kernel execution errors + cudaError_t cudaStatus = cudaGetLastError(); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "Kernel launch failed: " << cudaGetErrorString(cudaStatus); + + // Copy results back to host + cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3), + cudaMemcpyDeviceToHost); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus); + + // Wait for GPU to finish + cudaStatus = cudaDeviceSynchronize(); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus); + + // Test addition + EXPECT_NEAR(h_results[0].x, 5.0f, epsilon); + EXPECT_NEAR(h_results[0].y, 7.0f, epsilon); + EXPECT_NEAR(h_results[0].z, 9.0f, epsilon); + + // Test subtraction + EXPECT_NEAR(h_results[1].x, -3.0f, epsilon); + EXPECT_NEAR(h_results[1].y, -3.0f, epsilon); + EXPECT_NEAR(h_results[1].z, -3.0f, epsilon); + + // Test scale + EXPECT_NEAR(h_results[2].x, 2.0f, epsilon); + EXPECT_NEAR(h_results[2].y, 4.0f, epsilon); + EXPECT_NEAR(h_results[2].z, 6.0f, epsilon); + + // Test dot product + EXPECT_NEAR(h_results[3].x, 32.0f, epsilon); + + // Test cross product + EXPECT_NEAR(h_results[4].x, -3.0f, epsilon); + EXPECT_NEAR(h_results[4].y, 6.0f, epsilon); + EXPECT_NEAR(h_results[4].z, -3.0f, epsilon); + + // Test squared norm + EXPECT_NEAR(h_results[5].x, 14.0f, epsilon); + + // Test norm + EXPECT_NEAR(h_results[6].x, std::sqrt(14.0f), epsilon); + + // Test normalized + float norm = std::sqrt(14.0f); + EXPECT_NEAR(h_results[7].x, 1.0f / norm, epsilon); + EXPECT_NEAR(h_results[7].y, 2.0f / norm, epsilon); + EXPECT_NEAR(h_results[7].z, 3.0f / norm, epsilon); +} + +TEST_F(Vec3CudaTest, EdgeCases) { + // Test with zero vector + Vec3 zero{0.0f, 0.0f, 0.0f}; + Vec3 nonZero{1.0f, 2.0f, 3.0f}; + float scalar = 5.0f; + + // Launch kernel with 8 threads to test different operations + testVec3Operations<<<1, NUM_TESTS>>>(d_results, zero, nonZero, scalar); + + // Check for kernel execution errors + cudaError_t cudaStatus = cudaGetLastError(); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "Kernel launch failed: " << cudaGetErrorString(cudaStatus); + + // Copy results back to host + cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3), + cudaMemcpyDeviceToHost); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus); + + // Wait for GPU to finish + cudaStatus = cudaDeviceSynchronize(); + ASSERT_EQ(cudaStatus, cudaSuccess) + << "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus); + + // Test normalized with zero vector (should handle epsilon) + // Normalized of zero vector should be very small but not NaN + EXPECT_FALSE(isnan(h_results[7].x)); + EXPECT_FALSE(isnan(h_results[7].y)); + EXPECT_FALSE(isnan(h_results[7].z)); + + // Test dot product with zero vector (should be zero) + EXPECT_NEAR(h_results[3].x, 0.0f, epsilon); + + // Test cross product with zero vector (should be zero) + EXPECT_NEAR(h_results[4].x, 0.0f, epsilon); + EXPECT_NEAR(h_results[4].y, 0.0f, epsilon); + EXPECT_NEAR(h_results[4].z, 0.0f, epsilon); +} + +// Main function to run all tests +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 305a7e6..b0bc0d8 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -5,6 +5,6 @@ add_executable(${NAME}_tests ) target_link_libraries(${NAME}_tests gtest gtest_main) -target_link_libraries(${NAME_tests} ${NAME}) +target_link_libraries(${NAME}_tests ${NAME}) add_test(NAME Vec3Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)