generated from aselimov/cpp_project_template
47 lines
1.4 KiB
Plaintext
47 lines
1.4 KiB
Plaintext
#include <cuda_runtime.h>
|
|
#include <stdio.h>
|
|
|
|
__global__ void hello_cuda() {
|
|
printf("Hello CUDA from thread %d\n", threadIdx.x);
|
|
}
|
|
|
|
extern "C" void launch_hello_cuda() {
|
|
// First check device properties
|
|
cudaDeviceProp prop;
|
|
cudaGetDeviceProperties(&prop, 1);
|
|
printf("Using device: %s with compute capability %d.%d\n", prop.name,
|
|
prop.major, prop.minor);
|
|
|
|
hello_cuda<<<1, 10>>>();
|
|
cudaDeviceSynchronize();
|
|
fflush(stdout);
|
|
}
|
|
|
|
extern "C" void check_cuda() {
|
|
int deviceCount = 0;
|
|
cudaError_t error = cudaGetDeviceCount(&deviceCount);
|
|
|
|
if (error != cudaSuccess) {
|
|
printf("CUDA error: %s\n", cudaGetErrorString(error));
|
|
}
|
|
|
|
printf("Found %d CUDA devices\n", deviceCount);
|
|
|
|
for (int i = 0; i < deviceCount; i++) {
|
|
cudaDeviceProp prop;
|
|
cudaGetDeviceProperties(&prop, i);
|
|
|
|
printf("Device %d: %s\n", i, prop.name);
|
|
printf(" Compute capability: %d.%d\n", prop.major, prop.minor);
|
|
printf(" Total global memory: %.2f GB\n",
|
|
static_cast<float>(prop.totalGlobalMem) / (1024 * 1024 * 1024));
|
|
printf(" Multiprocessors: %d\n", prop.multiProcessorCount);
|
|
printf(" Max threads per block: %d\n", prop.maxThreadsPerBlock);
|
|
printf(" Max threads dimensions: (%d, %d, %d)\n", prop.maxThreadsDim[0],
|
|
prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
|
|
printf(" Max grid dimensions: (%d, %d, %d)\n", prop.maxGridSize[0],
|
|
prop.maxGridSize[1], prop.maxGridSize[2]);
|
|
printf("\n");
|
|
}
|
|
}
|