2323#define OCL_RUNTIME_EXPORT GC_DLL_EXPORT
2424
2525namespace {
26+ /* clang-format off */
27+ #define CaseToString (x ) case x: return #x;
28+ const char * opencl_errstr (cl_int err) {
29+ switch (err){
30+ CaseToString (CL_SUCCESS)
31+ CaseToString (CL_DEVICE_NOT_FOUND)
32+ CaseToString (CL_DEVICE_NOT_AVAILABLE)
33+ CaseToString (CL_COMPILER_NOT_AVAILABLE)
34+ CaseToString (CL_MEM_OBJECT_ALLOCATION_FAILURE)
35+ CaseToString (CL_OUT_OF_RESOURCES)
36+ CaseToString (CL_OUT_OF_HOST_MEMORY)
37+ CaseToString (CL_PROFILING_INFO_NOT_AVAILABLE)
38+ CaseToString (CL_MEM_COPY_OVERLAP)
39+ CaseToString (CL_IMAGE_FORMAT_MISMATCH)
40+ CaseToString (CL_IMAGE_FORMAT_NOT_SUPPORTED)
41+ CaseToString (CL_BUILD_PROGRAM_FAILURE)
42+ CaseToString (CL_MAP_FAILURE)
43+ CaseToString (CL_MISALIGNED_SUB_BUFFER_OFFSET)
44+ CaseToString (CL_COMPILE_PROGRAM_FAILURE)
45+ CaseToString (CL_LINKER_NOT_AVAILABLE)
46+ CaseToString (CL_LINK_PROGRAM_FAILURE)
47+ CaseToString (CL_DEVICE_PARTITION_FAILED)
48+ CaseToString (CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
49+ CaseToString (CL_INVALID_VALUE)
50+ CaseToString (CL_INVALID_DEVICE_TYPE)
51+ CaseToString (CL_INVALID_PLATFORM)
52+ CaseToString (CL_INVALID_DEVICE)
53+ CaseToString (CL_INVALID_CONTEXT)
54+ CaseToString (CL_INVALID_QUEUE_PROPERTIES)
55+ CaseToString (CL_INVALID_COMMAND_QUEUE)
56+ CaseToString (CL_INVALID_HOST_PTR)
57+ CaseToString (CL_INVALID_MEM_OBJECT)
58+ CaseToString (CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
59+ CaseToString (CL_INVALID_IMAGE_SIZE)
60+ CaseToString (CL_INVALID_SAMPLER)
61+ CaseToString (CL_INVALID_BINARY)
62+ CaseToString (CL_INVALID_BUILD_OPTIONS)
63+ CaseToString (CL_INVALID_PROGRAM)
64+ CaseToString (CL_INVALID_PROGRAM_EXECUTABLE)
65+ CaseToString (CL_INVALID_KERNEL_NAME)
66+ CaseToString (CL_INVALID_KERNEL_DEFINITION)
67+ CaseToString (CL_INVALID_KERNEL)
68+ CaseToString (CL_INVALID_ARG_INDEX)
69+ CaseToString (CL_INVALID_ARG_VALUE)
70+ CaseToString (CL_INVALID_ARG_SIZE)
71+ CaseToString (CL_INVALID_KERNEL_ARGS)
72+ CaseToString (CL_INVALID_WORK_DIMENSION)
73+ CaseToString (CL_INVALID_WORK_GROUP_SIZE)
74+ CaseToString (CL_INVALID_WORK_ITEM_SIZE)
75+ CaseToString (CL_INVALID_GLOBAL_OFFSET)
76+ CaseToString (CL_INVALID_EVENT_WAIT_LIST)
77+ CaseToString (CL_INVALID_EVENT)
78+ CaseToString (CL_INVALID_OPERATION)
79+ CaseToString (CL_INVALID_GL_OBJECT)
80+ CaseToString (CL_INVALID_BUFFER_SIZE)
81+ CaseToString (CL_INVALID_MIP_LEVEL)
82+ CaseToString (CL_INVALID_GLOBAL_WORK_SIZE)
83+ CaseToString (CL_INVALID_PROPERTY)
84+ CaseToString (CL_INVALID_IMAGE_DESCRIPTOR)
85+ CaseToString (CL_INVALID_COMPILER_OPTIONS)
86+ CaseToString (CL_INVALID_LINKER_OPTIONS)
87+ CaseToString (CL_INVALID_DEVICE_PARTITION_COUNT)
88+ default : return " Unknown OpenCL error code" ;
89+ }
90+ }
91+ /* clang-format on */
2692
2793#define CL_SAFE_CALL2 (a ) \
2894 do { \
2995 (a); \
3096 if (err != CL_SUCCESS) { \
31- fprintf (stderr, " FAIL: err=%d @ line=%d (%s)\n " , err, __LINE__, (#a)); \
97+ fprintf (stderr, " FAIL: err=%d (%s) @ line=%d (%s)\n " , err, \
98+ opencl_errstr (err), __LINE__, (#a)); \
3299 abort (); \
33100 } \
34101 } while (0 )
@@ -37,8 +104,8 @@ namespace {
37104 { \
38105 auto status = (call); \
39106 if (status != CL_SUCCESS) { \
40- fprintf (stderr, " CL error %d @ line=%d (%s)\n " , status, __LINE__, \
41- ( #call)); \
107+ fprintf (stderr, " CL error %d (%s) @ line=%d (%s)\n " , status, \
108+ opencl_errstr (status), __LINE__, ( #call)); \
42109 abort (); \
43110 } \
44111 }
@@ -159,7 +226,8 @@ static cl_device_id getDevice(cl_device_type *devtype) {
159226 }
160227
161228 std::vector<cl_device_id> devices (uintValue);
162- clGetDeviceIDs (platform, *devtype, uintValue, devices.data (), nullptr );
229+ CL_SAFE_CALL (
230+ clGetDeviceIDs (platform, *devtype, uintValue, devices.data (), nullptr ));
163231
164232 for (auto &device : devices) {
165233 CL_SAFE_CALL (clGetDeviceInfo (device, CL_DEVICE_VENDOR_ID, sizeof (cl_uint),
@@ -447,29 +515,35 @@ extern "C" OCL_RUNTIME_EXPORT void gpuWait(GPUCLQUEUE *queue) {
447515
448516// a silly workaround for mgpuModuleLoad. OCL needs context and device to load
449517// the module. We remember the last call to any mgpu* APIs
450- static thread_local GPUCLQUEUE *lastQueue;
518+ static thread_local GPUCLQUEUE *lastQueue{ nullptr } ;
451519extern " C" OCL_RUNTIME_EXPORT GPUCLQUEUE *mgpuStreamCreate () {
452520 auto ret =
453521 new GPUCLQUEUE (static_cast <cl_device_id>(nullptr ), nullptr , nullptr );
454522 lastQueue = ret;
455523 return ret;
456524}
457525
526+ GPUCLQUEUE *getOrCreateStaticQueue () {
527+ if (!lastQueue) {
528+ return mgpuStreamCreate ();
529+ }
530+ return lastQueue;
531+ }
532+
458533extern " C" OCL_RUNTIME_EXPORT void mgpuStreamDestroy (GPUCLQUEUE *queue) {
459534 lastQueue = nullptr ;
460535 delete queue;
461536}
462537
463538extern " C" OCL_RUNTIME_EXPORT void *
464539mgpuMemAlloc (uint64_t size, GPUCLQUEUE *queue, bool isShared) {
465- lastQueue = queue;
466- return allocDeviceMemory (queue, size, /* alignment*/ 64 , isShared);
540+ return allocDeviceMemory (queue ? queue : getOrCreateStaticQueue (), size,
541+ /* alignment*/ 64 , isShared);
467542}
468543
469544extern " C" OCL_RUNTIME_EXPORT void mgpuMemFree (void *ptr, GPUCLQUEUE *queue) {
470- lastQueue = queue;
471545 if (ptr) {
472- deallocDeviceMemory (queue, ptr);
546+ deallocDeviceMemory (queue ? queue : getOrCreateStaticQueue () , ptr);
473547 }
474548}
475549
@@ -498,8 +572,8 @@ mgpuLaunchKernel(cl_kernel kernel, size_t gridX, size_t gridY, size_t gridZ,
498572 size_t sharedMemBytes, GPUCLQUEUE *queue, void **params,
499573 void ** /* extra*/ , size_t paramsCount) {
500574 launchKernel (
501- queue, kernel, gridX, gridY, gridZ, blockX, blockY, blockZ ,
502- sharedMemBytes,
575+ queue ? queue : getOrCreateStaticQueue () , kernel, gridX, gridY, gridZ,
576+ blockX, blockY, blockZ, sharedMemBytes,
503577 [&]() {
504578 // todo (yijie): do we need to handle shared mem? If there is dynamic
505579 // shared mem required, which value should paramsCount be?
@@ -512,5 +586,5 @@ mgpuLaunchKernel(cl_kernel kernel, size_t gridX, size_t gridY, size_t gridZ,
512586}
513587
514588extern " C" OCL_RUNTIME_EXPORT void mgpuStreamSynchronize (GPUCLQUEUE *queue) {
515- CL_SAFE_CALL (clFinish (queue->queue_ ));
589+ CL_SAFE_CALL (clFinish (( queue ? queue : getOrCreateStaticQueue ()) ->queue_ ));
516590}
0 commit comments