generated from aselimov/cpp_project_template
Add CUDA support
This commit is contained in:
parent
fbc34a0bdd
commit
80d3b6276e
@ -2,6 +2,10 @@ cmake_minimum_required(VERSION 3.9)
|
||||
set(NAME "Vec3")
|
||||
project(${NAME})
|
||||
|
||||
# Check for CUDA
|
||||
include(CheckLanguage)
|
||||
check_language(CUDA)
|
||||
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
add_compile_options(-Wall -Wextra -Wpedantic)
|
||||
|
@ -21,4 +21,5 @@ endif()
|
||||
## Features
|
||||
|
||||
* Guards all testing code to only be run when Vec3 is the main project
|
||||
* Compatible with both CUDA and C++
|
||||
|
||||
|
@ -3,35 +3,44 @@
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
|
||||
#ifdef __CUDACC__
|
||||
#define CUDA_CALLABLE __host__ __device__
|
||||
#else
|
||||
#define CUDA_CALLABLE
|
||||
#endif
|
||||
|
||||
template <typename T> struct Vec3 {
|
||||
T x;
|
||||
T y;
|
||||
T z;
|
||||
|
||||
inline Vec3<T> operator+(Vec3<T> other) const {
|
||||
CUDA_CALLABLE inline Vec3<T> operator+(Vec3<T> other) const {
|
||||
return {x + other.x, y + other.y, z + other.z};
|
||||
};
|
||||
|
||||
inline Vec3<T> operator-(Vec3<T> other) const {
|
||||
CUDA_CALLABLE inline Vec3<T> operator-(Vec3<T> other) const {
|
||||
return {x - other.x, y - other.y, z - other.z};
|
||||
};
|
||||
|
||||
inline Vec3 scale(T scalar) { return {x * scalar, y * scalar, z * scalar}; };
|
||||
CUDA_CALLABLE inline Vec3 scale(T scalar) {
|
||||
return {x * scalar, y * scalar, z * scalar};
|
||||
};
|
||||
|
||||
inline T dot(Vec3<T> other) const {
|
||||
CUDA_CALLABLE inline T dot(Vec3<T> other) const {
|
||||
return x * other.x + y * other.y + z * other.z;
|
||||
}
|
||||
|
||||
inline Vec3<T> cross(Vec3<T> other) const {
|
||||
CUDA_CALLABLE inline Vec3<T> cross(Vec3<T> other) const {
|
||||
return {y * other.z - z * other.y, z * other.x - x * other.z,
|
||||
x * other.y - y * other.x};
|
||||
}
|
||||
|
||||
inline T squared_norm2() const { return x * x + y * y + z * z; }
|
||||
CUDA_CALLABLE inline T squared_norm2() const { return x * x + y * y + z * z; }
|
||||
|
||||
inline T norm2() const { return std::sqrt(squared_norm2()); }
|
||||
CUDA_CALLABLE inline T norm2() const { return std::sqrt(squared_norm2()); }
|
||||
|
||||
inline Vec3<T> normalized() {
|
||||
CUDA_CALLABLE inline Vec3<T> normalized() {
|
||||
// Add epsilon to the norm for stability when the norm is 0
|
||||
T norm = std::max(norm2(), std::numeric_limits<T>::epsilon());
|
||||
return {x / norm, y / norm, z / norm};
|
||||
|
@ -11,3 +11,15 @@ endif()
|
||||
|
||||
add_subdirectory(lib/googletest)
|
||||
add_subdirectory(unit_tests)
|
||||
|
||||
# Only run Cuda tests if cuda is available
|
||||
if (CMAKE_CUDA_COMPILER)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 61)
|
||||
set(CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
add_subdirectory(cuda_unit_tests)
|
||||
message(STATUS "CUDA found. CUDA tests will be build")
|
||||
else()
|
||||
message(STATUS "CUDA not found. Skipping CUDA tests")
|
||||
endif()
|
||||
|
||||
|
10
tests/cuda_unit_tests/CMakeLists.txt
Normal file
10
tests/cuda_unit_tests/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR})
|
||||
|
||||
add_executable(${NAME}_cuda_tests
|
||||
vec3_test.cu
|
||||
)
|
||||
|
||||
target_link_libraries(${NAME}_cuda_tests gtest gtest_main)
|
||||
target_link_libraries(${NAME}_cuda_tests ${NAME})
|
||||
|
||||
add_test(NAME Vec3CudaTests COMMAND ${CMAKE_BINARY_DIR}/tests/cuda_unit_tests/${NAME}_cuda_tests)
|
177
tests/cuda_unit_tests/vec3_test.cu
Normal file
177
tests/cuda_unit_tests/vec3_test.cu
Normal file
@ -0,0 +1,177 @@
|
||||
#include "vec3.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
// Define kernel function to test Vec3 operations
|
||||
template <typename T>
|
||||
__global__ void testVec3Operations(Vec3<T> *results, Vec3<T> a, Vec3<T> b,
|
||||
T scalar) {
|
||||
int idx = threadIdx.x;
|
||||
|
||||
// Test different operations based on thread index
|
||||
switch (idx) {
|
||||
case 0: // Addition
|
||||
results[idx] = a + b;
|
||||
break;
|
||||
case 1: // Subtraction
|
||||
results[idx] = a - b;
|
||||
break;
|
||||
case 2: // Scale
|
||||
results[idx] = a.scale(scalar);
|
||||
break;
|
||||
case 3: // Dot product - store in x component
|
||||
results[idx].x = a.dot(b);
|
||||
results[idx].y = 0;
|
||||
results[idx].z = 0;
|
||||
break;
|
||||
case 4: // Cross product
|
||||
results[idx] = a.cross(b);
|
||||
break;
|
||||
case 5: // Squared norm - store in x component
|
||||
results[idx].x = a.squared_norm2();
|
||||
results[idx].y = 0;
|
||||
results[idx].z = 0;
|
||||
break;
|
||||
case 6: // Norm - store in x component
|
||||
results[idx].x = a.norm2();
|
||||
results[idx].y = 0;
|
||||
results[idx].z = 0;
|
||||
break;
|
||||
case 7: // Normalized
|
||||
results[idx] = a.normalized();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Test fixture for Vec3 CUDA tests
|
||||
class Vec3CudaTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
// Allocate device memory for results
|
||||
cudaMalloc(&d_results, NUM_TESTS * sizeof(Vec3<float>));
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
// Free device memory
|
||||
cudaFree(d_results);
|
||||
}
|
||||
|
||||
// Number of operations to test
|
||||
static const int NUM_TESTS = 8;
|
||||
|
||||
// Pointer to device memory for results
|
||||
Vec3<float> *d_results;
|
||||
|
||||
// Host memory for results
|
||||
Vec3<float> h_results[NUM_TESTS];
|
||||
|
||||
// Test with a reasonable epsilon for floating point comparisons
|
||||
float epsilon = 1e-5f;
|
||||
};
|
||||
|
||||
TEST_F(Vec3CudaTest, BasicOperations) {
|
||||
// Define test vectors
|
||||
Vec3<float> a{1.0f, 2.0f, 3.0f};
|
||||
Vec3<float> b{4.0f, 5.0f, 6.0f};
|
||||
float scalar = 2.0f;
|
||||
|
||||
// Launch kernel with 8 threads to test different operations
|
||||
testVec3Operations<<<1, NUM_TESTS>>>(d_results, a, b, scalar);
|
||||
|
||||
// Check for kernel execution errors
|
||||
cudaError_t cudaStatus = cudaGetLastError();
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "Kernel launch failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Copy results back to host
|
||||
cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
|
||||
cudaMemcpyDeviceToHost);
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Wait for GPU to finish
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Test addition
|
||||
EXPECT_NEAR(h_results[0].x, 5.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[0].y, 7.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[0].z, 9.0f, epsilon);
|
||||
|
||||
// Test subtraction
|
||||
EXPECT_NEAR(h_results[1].x, -3.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[1].y, -3.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[1].z, -3.0f, epsilon);
|
||||
|
||||
// Test scale
|
||||
EXPECT_NEAR(h_results[2].x, 2.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[2].y, 4.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[2].z, 6.0f, epsilon);
|
||||
|
||||
// Test dot product
|
||||
EXPECT_NEAR(h_results[3].x, 32.0f, epsilon);
|
||||
|
||||
// Test cross product
|
||||
EXPECT_NEAR(h_results[4].x, -3.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[4].y, 6.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[4].z, -3.0f, epsilon);
|
||||
|
||||
// Test squared norm
|
||||
EXPECT_NEAR(h_results[5].x, 14.0f, epsilon);
|
||||
|
||||
// Test norm
|
||||
EXPECT_NEAR(h_results[6].x, std::sqrt(14.0f), epsilon);
|
||||
|
||||
// Test normalized
|
||||
float norm = std::sqrt(14.0f);
|
||||
EXPECT_NEAR(h_results[7].x, 1.0f / norm, epsilon);
|
||||
EXPECT_NEAR(h_results[7].y, 2.0f / norm, epsilon);
|
||||
EXPECT_NEAR(h_results[7].z, 3.0f / norm, epsilon);
|
||||
}
|
||||
|
||||
TEST_F(Vec3CudaTest, EdgeCases) {
|
||||
// Test with zero vector
|
||||
Vec3<float> zero{0.0f, 0.0f, 0.0f};
|
||||
Vec3<float> nonZero{1.0f, 2.0f, 3.0f};
|
||||
float scalar = 5.0f;
|
||||
|
||||
// Launch kernel with 8 threads to test different operations
|
||||
testVec3Operations<<<1, NUM_TESTS>>>(d_results, zero, nonZero, scalar);
|
||||
|
||||
// Check for kernel execution errors
|
||||
cudaError_t cudaStatus = cudaGetLastError();
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "Kernel launch failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Copy results back to host
|
||||
cudaStatus = cudaMemcpy(h_results, d_results, NUM_TESTS * sizeof(Vec3<float>),
|
||||
cudaMemcpyDeviceToHost);
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "cudaMemcpy failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Wait for GPU to finish
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
ASSERT_EQ(cudaStatus, cudaSuccess)
|
||||
<< "cudaDeviceSynchronize failed: " << cudaGetErrorString(cudaStatus);
|
||||
|
||||
// Test normalized with zero vector (should handle epsilon)
|
||||
// Normalized of zero vector should be very small but not NaN
|
||||
EXPECT_FALSE(isnan(h_results[7].x));
|
||||
EXPECT_FALSE(isnan(h_results[7].y));
|
||||
EXPECT_FALSE(isnan(h_results[7].z));
|
||||
|
||||
// Test dot product with zero vector (should be zero)
|
||||
EXPECT_NEAR(h_results[3].x, 0.0f, epsilon);
|
||||
|
||||
// Test cross product with zero vector (should be zero)
|
||||
EXPECT_NEAR(h_results[4].x, 0.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[4].y, 0.0f, epsilon);
|
||||
EXPECT_NEAR(h_results[4].z, 0.0f, epsilon);
|
||||
}
|
||||
|
||||
// Main function to run all tests
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
@ -5,6 +5,6 @@ add_executable(${NAME}_tests
|
||||
)
|
||||
|
||||
target_link_libraries(${NAME}_tests gtest gtest_main)
|
||||
target_link_libraries(${NAME_tests} ${NAME})
|
||||
target_link_libraries(${NAME}_tests ${NAME})
|
||||
|
||||
add_test(NAME Vec3Tests COMMAND ${CMAKE_BINARY_DIR}/tests/unit_tests/${NAME}_tests)
|
||||
|
Loading…
x
Reference in New Issue
Block a user