Here is a CUDA C program to query the properties of the available GPUs on your system using the cudaGetDeviceCount and cudaGetDeviceProperties functions:
#include <stdio.h>
#include <cuda_runtime.h>
int main() {
int deviceCount = 0;
// Get the number of available GPUs
cudaError_t err = cudaGetDeviceCount(&deviceCount);
if (err != cudaSuccess) {
printf("Error querying the number of GPUs: %s\n", cudaGetErrorString(err));
return -1;
}
printf("Number of CUDA-capable GPUs: %d\n\n", deviceCount);
for (int i = 0; i < deviceCount; i++) {
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, i);
printf("GPU #%d: %s\n", i, deviceProp.name);
printf(" Compute Capability: %d.%d\n", deviceProp.major, deviceProp.minor);
printf(" Total Global Memory: %.2f GB\n", deviceProp.totalGlobalMem / (1024.0 * 1024.0 * 1024.0));
printf(" Shared Memory Per Block: %.2f KB\n", deviceProp.sharedMemPerBlock / 1024.0);
printf(" Registers Per Block: %d\n", deviceProp.regsPerBlock);
printf(" Warp Size: %d\n", deviceProp.warpSize);
printf(" Maximum Threads Per Block: %d\n", deviceProp.maxThreadsPerBlock);
printf(" Maximum Block Dimensions: (%d, %d, %d)\n",
deviceProp.maxThreadsDim[0],
deviceProp.maxThreadsDim[1],
deviceProp.maxThreadsDim[2]);
printf(" Maximum Grid Dimensions: (%d, %d, %d)\n",
deviceProp.maxGridSize[0],
deviceProp.maxGridSize[1],
deviceProp.maxGridSize[2]);
printf(" Clock Rate: %.2f MHz\n", deviceProp.clockRate / 1000.0);
printf(" Memory Bus Width: %d bits\n", deviceProp.memoryBusWidth);
printf(" Memory Bandwidth: %.2f GB/s\n\n",
2.0 * deviceProp.memoryClockRate * (deviceProp.memoryBusWidth / 8) / 1.0e6);
}
return 0;
}
The output looks like:
GPU #0: Tesla P100-PCIE-16GB
Compute Capability: 6.0
Total Global Memory: 15.89 GB
Shared Memory Per Block: 48.00 KB
Registers Per Block: 65536
Warp Size: 32
Maximum Threads Per Block: 1024
Maximum Block Dimensions: (1024, 1024, 64)
Maximum Grid Dimensions: (2147483647, 65535, 65535)
Clock Rate: 1328.50 MHz
Memory Bus Width: 4096 bits
Memory Bandwidth: 732.16 GB/s