|
| 1 | +/* |
| 2 | + * CXL Type 2 GPU Test Program |
| 3 | + * Tests the hetGPU backend through CXL Type 2 device |
| 4 | + * |
| 5 | + * Compile: gcc -o guest_gpu_test guest_gpu_test.c -ldl |
| 6 | + * Run: ./guest_gpu_test |
| 7 | + */ |
| 8 | + |
| 9 | +#include <stdio.h> |
| 10 | +#include <stdlib.h> |
| 11 | +#include <string.h> |
| 12 | +#include <fcntl.h> |
| 13 | +#include <unistd.h> |
| 14 | +#include <sys/mman.h> |
| 15 | +#include <sys/ioctl.h> |
| 16 | +#include <stdint.h> |
| 17 | +#include <dlfcn.h> |
| 18 | + |
| 19 | +/* CXL Type 2 device identifiers */ |
| 20 | +#define CXL_TYPE2_VENDOR_ID 0x8086 |
| 21 | +#define CXL_TYPE2_DEVICE_ID 0x0d92 |
| 22 | + |
| 23 | +/* CUDA-like types */ |
| 24 | +typedef int CUresult; |
| 25 | +typedef void* CUdevice; |
| 26 | +typedef void* CUcontext; |
| 27 | +typedef void* CUmodule; |
| 28 | +typedef void* CUfunction; |
| 29 | +typedef uint64_t CUdeviceptr; |
| 30 | + |
| 31 | +#define CUDA_SUCCESS 0 |
| 32 | + |
| 33 | +/* Function pointer types */ |
| 34 | +typedef CUresult (*cuInit_t)(unsigned int); |
| 35 | +typedef CUresult (*cuDeviceGetCount_t)(int*); |
| 36 | +typedef CUresult (*cuDeviceGet_t)(CUdevice*, int); |
| 37 | +typedef CUresult (*cuDeviceGetName_t)(char*, int, CUdevice); |
| 38 | +typedef CUresult (*cuDeviceTotalMem_t)(size_t*, CUdevice); |
| 39 | +typedef CUresult (*cuCtxCreate_t)(CUcontext*, unsigned int, CUdevice); |
| 40 | +typedef CUresult (*cuMemAlloc_t)(CUdeviceptr*, size_t); |
| 41 | +typedef CUresult (*cuMemFree_t)(CUdeviceptr); |
| 42 | +typedef CUresult (*cuMemcpyHtoD_t)(CUdeviceptr, const void*, size_t); |
| 43 | +typedef CUresult (*cuMemcpyDtoH_t)(void*, CUdeviceptr, size_t); |
| 44 | + |
| 45 | +/* Test using direct PCI access */ |
| 46 | +int test_pci_device(void) |
| 47 | +{ |
| 48 | + char path[256]; |
| 49 | + int fd; |
| 50 | + uint16_t vendor, device; |
| 51 | + |
| 52 | + printf("=== Testing PCI Device Access ===\n"); |
| 53 | + |
| 54 | + /* Try to find the CXL Type 2 device */ |
| 55 | + for (int bus = 0; bus < 256; bus++) { |
| 56 | + for (int dev = 0; dev < 32; dev++) { |
| 57 | + snprintf(path, sizeof(path), |
| 58 | + "/sys/bus/pci/devices/0000:%02x:%02x.0/vendor", bus, dev); |
| 59 | + fd = open(path, O_RDONLY); |
| 60 | + if (fd < 0) continue; |
| 61 | + |
| 62 | + char buf[16]; |
| 63 | + if (read(fd, buf, sizeof(buf)) > 0) { |
| 64 | + vendor = strtol(buf, NULL, 16); |
| 65 | + close(fd); |
| 66 | + |
| 67 | + snprintf(path, sizeof(path), |
| 68 | + "/sys/bus/pci/devices/0000:%02x:%02x.0/device", bus, dev); |
| 69 | + fd = open(path, O_RDONLY); |
| 70 | + if (fd >= 0 && read(fd, buf, sizeof(buf)) > 0) { |
| 71 | + device = strtol(buf, NULL, 16); |
| 72 | + close(fd); |
| 73 | + |
| 74 | + if (vendor == CXL_TYPE2_VENDOR_ID && device == CXL_TYPE2_DEVICE_ID) { |
| 75 | + printf("Found CXL Type 2 device at %02x:%02x.0\n", bus, dev); |
| 76 | + printf(" Vendor: 0x%04x, Device: 0x%04x\n", vendor, device); |
| 77 | + |
| 78 | + /* Read resource info */ |
| 79 | + snprintf(path, sizeof(path), |
| 80 | + "/sys/bus/pci/devices/0000:%02x:%02x.0/resource", bus, dev); |
| 81 | + fd = open(path, O_RDONLY); |
| 82 | + if (fd >= 0) { |
| 83 | + char resource[1024]; |
| 84 | + ssize_t n = read(fd, resource, sizeof(resource)-1); |
| 85 | + if (n > 0) { |
| 86 | + resource[n] = '\0'; |
| 87 | + printf(" Resources:\n%s", resource); |
| 88 | + } |
| 89 | + close(fd); |
| 90 | + } |
| 91 | + return 0; |
| 92 | + } |
| 93 | + } |
| 94 | + } |
| 95 | + close(fd); |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + printf("CXL Type 2 device not found\n"); |
| 100 | + return -1; |
| 101 | +} |
| 102 | + |
| 103 | +/* Test using CUDA driver API (if available) */ |
| 104 | +int test_cuda_api(void) |
| 105 | +{ |
| 106 | + void *handle; |
| 107 | + cuInit_t cuInit; |
| 108 | + cuDeviceGetCount_t cuDeviceGetCount; |
| 109 | + cuDeviceGet_t cuDeviceGet; |
| 110 | + cuDeviceGetName_t cuDeviceGetName; |
| 111 | + cuDeviceTotalMem_t cuDeviceTotalMem; |
| 112 | + cuCtxCreate_t cuCtxCreate; |
| 113 | + cuMemAlloc_t cuMemAlloc; |
| 114 | + cuMemFree_t cuMemFree; |
| 115 | + cuMemcpyHtoD_t cuMemcpyHtoD; |
| 116 | + cuMemcpyDtoH_t cuMemcpyDtoH; |
| 117 | + |
| 118 | + int count; |
| 119 | + CUdevice dev; |
| 120 | + CUcontext ctx; |
| 121 | + CUdeviceptr devPtr; |
| 122 | + char name[256]; |
| 123 | + size_t totalMem; |
| 124 | + CUresult err; |
| 125 | + |
| 126 | + printf("\n=== Testing CUDA API ===\n"); |
| 127 | + |
| 128 | + /* Try to load CUDA library */ |
| 129 | + handle = dlopen("libcuda.so.1", RTLD_NOW); |
| 130 | + if (!handle) { |
| 131 | + handle = dlopen("libcuda.so", RTLD_NOW); |
| 132 | + } |
| 133 | + if (!handle) { |
| 134 | + handle = dlopen("libnvcuda.so", RTLD_NOW); |
| 135 | + } |
| 136 | + if (!handle) { |
| 137 | + printf("Could not load CUDA library: %s\n", dlerror()); |
| 138 | + printf("This is expected if CUDA is not installed in guest\n"); |
| 139 | + return -1; |
| 140 | + } |
| 141 | + |
| 142 | + printf("CUDA library loaded successfully\n"); |
| 143 | + |
| 144 | + /* Load functions */ |
| 145 | + cuInit = (cuInit_t)dlsym(handle, "cuInit"); |
| 146 | + cuDeviceGetCount = (cuDeviceGetCount_t)dlsym(handle, "cuDeviceGetCount"); |
| 147 | + cuDeviceGet = (cuDeviceGet_t)dlsym(handle, "cuDeviceGet"); |
| 148 | + cuDeviceGetName = (cuDeviceGetName_t)dlsym(handle, "cuDeviceGetName"); |
| 149 | + cuDeviceTotalMem = (cuDeviceTotalMem_t)dlsym(handle, "cuDeviceTotalMem_v2"); |
| 150 | + cuCtxCreate = (cuCtxCreate_t)dlsym(handle, "cuCtxCreate_v2"); |
| 151 | + cuMemAlloc = (cuMemAlloc_t)dlsym(handle, "cuMemAlloc_v2"); |
| 152 | + cuMemFree = (cuMemFree_t)dlsym(handle, "cuMemFree_v2"); |
| 153 | + cuMemcpyHtoD = (cuMemcpyHtoD_t)dlsym(handle, "cuMemcpyHtoD_v2"); |
| 154 | + cuMemcpyDtoH = (cuMemcpyDtoH_t)dlsym(handle, "cuMemcpyDtoH_v2"); |
| 155 | + |
| 156 | + if (!cuInit) { |
| 157 | + printf("Could not find cuInit\n"); |
| 158 | + dlclose(handle); |
| 159 | + return -1; |
| 160 | + } |
| 161 | + |
| 162 | + /* Initialize CUDA */ |
| 163 | + err = cuInit(0); |
| 164 | + if (err != CUDA_SUCCESS) { |
| 165 | + printf("cuInit failed: %d\n", err); |
| 166 | + dlclose(handle); |
| 167 | + return -1; |
| 168 | + } |
| 169 | + printf("CUDA initialized\n"); |
| 170 | + |
| 171 | + /* Get device count */ |
| 172 | + if (cuDeviceGetCount) { |
| 173 | + err = cuDeviceGetCount(&count); |
| 174 | + if (err == CUDA_SUCCESS) { |
| 175 | + printf("Device count: %d\n", count); |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + /* Get device info */ |
| 180 | + if (cuDeviceGet && count > 0) { |
| 181 | + err = cuDeviceGet(&dev, 0); |
| 182 | + if (err == CUDA_SUCCESS) { |
| 183 | + printf("Got device 0\n"); |
| 184 | + |
| 185 | + if (cuDeviceGetName) { |
| 186 | + err = cuDeviceGetName(name, sizeof(name), dev); |
| 187 | + if (err == CUDA_SUCCESS) { |
| 188 | + printf("Device name: %s\n", name); |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + if (cuDeviceTotalMem) { |
| 193 | + err = cuDeviceTotalMem(&totalMem, dev); |
| 194 | + if (err == CUDA_SUCCESS) { |
| 195 | + printf("Total memory: %zu MB\n", totalMem / (1024*1024)); |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + /* Test memory allocation */ |
| 202 | + if (cuCtxCreate && cuMemAlloc && cuMemFree && count > 0) { |
| 203 | + printf("\n=== Testing Memory Operations ===\n"); |
| 204 | + |
| 205 | + err = cuCtxCreate(&ctx, 0, dev); |
| 206 | + if (err == CUDA_SUCCESS) { |
| 207 | + printf("Context created\n"); |
| 208 | + |
| 209 | + /* Allocate device memory */ |
| 210 | + size_t size = 1024 * 1024; /* 1 MB */ |
| 211 | + err = cuMemAlloc(&devPtr, size); |
| 212 | + if (err == CUDA_SUCCESS) { |
| 213 | + printf("Allocated %zu bytes at device address 0x%lx\n", size, (unsigned long)devPtr); |
| 214 | + |
| 215 | + /* Test memcpy if available */ |
| 216 | + if (cuMemcpyHtoD && cuMemcpyDtoH) { |
| 217 | + char *hostBuf = malloc(size); |
| 218 | + char *resultBuf = malloc(size); |
| 219 | + |
| 220 | + if (hostBuf && resultBuf) { |
| 221 | + /* Fill with pattern */ |
| 222 | + memset(hostBuf, 0xAB, size); |
| 223 | + |
| 224 | + /* Copy to device */ |
| 225 | + err = cuMemcpyHtoD(devPtr, hostBuf, size); |
| 226 | + if (err == CUDA_SUCCESS) { |
| 227 | + printf("Host to device copy succeeded\n"); |
| 228 | + |
| 229 | + /* Copy back */ |
| 230 | + memset(resultBuf, 0, size); |
| 231 | + err = cuMemcpyDtoH(resultBuf, devPtr, size); |
| 232 | + if (err == CUDA_SUCCESS) { |
| 233 | + printf("Device to host copy succeeded\n"); |
| 234 | + |
| 235 | + /* Verify */ |
| 236 | + if (memcmp(hostBuf, resultBuf, size) == 0) { |
| 237 | + printf("Data verification PASSED!\n"); |
| 238 | + } else { |
| 239 | + printf("Data verification FAILED!\n"); |
| 240 | + } |
| 241 | + } |
| 242 | + } |
| 243 | + |
| 244 | + free(hostBuf); |
| 245 | + free(resultBuf); |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + cuMemFree(devPtr); |
| 250 | + printf("Memory freed\n"); |
| 251 | + } else { |
| 252 | + printf("Memory allocation failed: %d\n", err); |
| 253 | + } |
| 254 | + } else { |
| 255 | + printf("Context creation failed: %d\n", err); |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + dlclose(handle); |
| 260 | + return 0; |
| 261 | +} |
| 262 | + |
| 263 | +/* Test CXL memory region directly */ |
| 264 | +int test_cxl_memory(void) |
| 265 | +{ |
| 266 | + char path[256]; |
| 267 | + int fd; |
| 268 | + void *map; |
| 269 | + uint64_t start, end, flags; |
| 270 | + |
| 271 | + printf("\n=== Testing CXL Memory Region ===\n"); |
| 272 | + |
| 273 | + /* Find the device's BAR0 */ |
| 274 | + snprintf(path, sizeof(path), "/sys/bus/pci/devices/0000:0d:00.0/resource0"); |
| 275 | + fd = open(path, O_RDWR | O_SYNC); |
| 276 | + if (fd < 0) { |
| 277 | + printf("Could not open resource0: %s\n", path); |
| 278 | + printf("Try running as root\n"); |
| 279 | + return -1; |
| 280 | + } |
| 281 | + |
| 282 | + /* Get resource size from sysfs */ |
| 283 | + FILE *fp = fopen("/sys/bus/pci/devices/0000:0d:00.0/resource", "r"); |
| 284 | + if (fp) { |
| 285 | + if (fscanf(fp, "0x%lx 0x%lx 0x%lx", &start, &end, &flags) == 3) { |
| 286 | + size_t size = end - start + 1; |
| 287 | + printf("BAR0: start=0x%lx end=0x%lx size=%zu\n", start, end, size); |
| 288 | + |
| 289 | + if (size > 0 && size < 1024*1024*1024) { |
| 290 | + map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |
| 291 | + if (map != MAP_FAILED) { |
| 292 | + printf("Mapped BAR0 at %p\n", map); |
| 293 | + |
| 294 | + /* Read some registers */ |
| 295 | + uint32_t *regs = (uint32_t *)map; |
| 296 | + printf("Register[0]: 0x%08x\n", regs[0]); |
| 297 | + printf("Register[1]: 0x%08x\n", regs[1]); |
| 298 | + printf("Register[2]: 0x%08x\n", regs[2]); |
| 299 | + printf("Register[3]: 0x%08x\n", regs[3]); |
| 300 | + |
| 301 | + munmap(map, size); |
| 302 | + } else { |
| 303 | + perror("mmap failed"); |
| 304 | + } |
| 305 | + } |
| 306 | + } |
| 307 | + fclose(fp); |
| 308 | + } |
| 309 | + |
| 310 | + close(fd); |
| 311 | + return 0; |
| 312 | +} |
| 313 | + |
| 314 | +int main(int argc, char *argv[]) |
| 315 | +{ |
| 316 | + printf("CXL Type 2 GPU Test Program\n"); |
| 317 | + printf("============================\n\n"); |
| 318 | + |
| 319 | + /* Test PCI device access */ |
| 320 | + test_pci_device(); |
| 321 | + |
| 322 | + /* Test CXL memory region */ |
| 323 | + test_cxl_memory(); |
| 324 | + |
| 325 | + /* Test CUDA API */ |
| 326 | + test_cuda_api(); |
| 327 | + |
| 328 | + printf("\n=== Test Complete ===\n"); |
| 329 | + return 0; |
| 330 | +} |
0 commit comments