|
1 | 1 | #include "cuda.h" |
2 | 2 | #include <dlfcn.h> |
3 | 3 | #include <stdbool.h> |
| 4 | +#include <stdio.h> |
4 | 5 | #include <stdlib.h> |
5 | 6 | #define PY_SSIZE_T_CLEAN |
6 | 7 | #include <Python.h> |
@@ -420,10 +421,53 @@ static PyObject *fillTMADescriptor(PyObject *self, PyObject *args) { |
420 | 421 | static cuTensorMapEncodeTiled_t cuTensorMapEncodeTiled = NULL; |
421 | 422 | INITIALIZE_FUNCTION_POINTER_IF_NULL(cuTensorMapEncodeTiled, |
422 | 423 | getCuTensorMapEncodeTiledHandle); |
423 | | - CUDA_CHECK_AND_RETURN_NULL(cuTensorMapEncodeTiled( |
| 424 | + CUresult res = cuTensorMapEncodeTiled( |
424 | 425 | &desc->tensorMap, elemType, rank, (void *)global_address, shapeInt, |
425 | 426 | stridesLL, blockSizeInt, elementStrides, CU_TENSOR_MAP_INTERLEAVE_NONE, |
426 | | - swizzle, CU_TENSOR_MAP_L2_PROMOTION_L2_128B, fill)); |
| 427 | + swizzle, CU_TENSOR_MAP_L2_PROMOTION_L2_128B, fill); |
| 428 | + if (res != CUDA_SUCCESS) { |
| 429 | + const char *str; |
| 430 | + cuGetErrorString(res, &str); |
| 431 | + char err[4096] = {0}; |
| 432 | + size_t off = 0; |
| 433 | + off += snprintf( |
| 434 | + err + off, sizeof(err) - off, |
| 435 | + "Triton Error [CUDA]: Failed to create tensor map descriptor: %s\n", |
| 436 | + str ? str : "Unknown error"); |
| 437 | + off += snprintf(err + off, sizeof(err) - off, |
| 438 | + "elemType=%d rank=%d global_address=0x%llx elemSize=%d " |
| 439 | + "swizzle=%d padding=%d\n", |
| 440 | + elemType, rank, (unsigned long long)global_address, |
| 441 | + elemSize, swizzle, padding); |
| 442 | + off += snprintf(err + off, sizeof(err) - off, "shape=["); |
| 443 | + for (int i = 0; i < rank; ++i) { |
| 444 | + off += |
| 445 | + snprintf(err + off, sizeof(err) - off, "%llu%s", |
| 446 | + (unsigned long long)shapeInt[i], (i + 1 < rank) ? ", " : ""); |
| 447 | + } |
| 448 | + off += snprintf(err + off, sizeof(err) - off, "]\n"); |
| 449 | + off += snprintf(err + off, sizeof(err) - off, "strides=["); |
| 450 | + for (int i = 0; i < rank; ++i) { |
| 451 | + off += snprintf(err + off, sizeof(err) - off, "%llu%s", |
| 452 | + (unsigned long long)stridesLL[i], |
| 453 | + (i + 1 < rank) ? ", " : ""); |
| 454 | + } |
| 455 | + off += snprintf(err + off, sizeof(err) - off, "]\n"); |
| 456 | + off += snprintf(err + off, sizeof(err) - off, "blockSize=["); |
| 457 | + for (int i = 0; i < rank; ++i) { |
| 458 | + off += snprintf(err + off, sizeof(err) - off, "%u%s", |
| 459 | + (unsigned)blockSizeInt[i], (i + 1 < rank) ? ", " : ""); |
| 460 | + } |
| 461 | + off += snprintf(err + off, sizeof(err) - off, "] elementStrides=["); |
| 462 | + for (int i = 0; i < rank; ++i) { |
| 463 | + off += snprintf(err + off, sizeof(err) - off, "%u%s", |
| 464 | + (unsigned)elementStrides[i], (i + 1 < rank) ? ", " : ""); |
| 465 | + } |
| 466 | + off += snprintf(err + off, sizeof(err) - off, "]\n"); |
| 467 | + PyErr_SetString(PyExc_RuntimeError, err); |
| 468 | + |
| 469 | + goto cleanup; |
| 470 | + } |
427 | 471 |
|
428 | 472 | return (PyObject *)desc; |
429 | 473 |
|
|
0 commit comments