diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 9fb4239c38..6596a01317 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -30,12 +30,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::vector compiledLocalWorksize; + if (!pLocalWorkSize) { + cl_device_id device = nullptr; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, + sizeof(device), &device, nullptr)); + // This query always returns size_t[3], if nothing was specified it returns + // all zeroes. + size_t queriedLocalWorkSize[3] = {0, 0, 0}; + CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( + cl_adapter::cast(hKernel), device, + CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(size_t[3]), + queriedLocalWorkSize, nullptr)); + if (queriedLocalWorkSize[0] != 0) { + for (uint32_t i = 0; i < workDim; i++) { + compiledLocalWorksize.push_back(queriedLocalWorkSize[i]); + } + } + } CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( cl_adapter::cast(hQueue), cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), + pGlobalWorkSize, + compiledLocalWorksize.empty() ? pLocalWorkSize + : compiledLocalWorksize.data(), + numEventsInWaitList, cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent))); return UR_RESULT_SUCCESS; diff --git a/test/conformance/enqueue/enqueue_adapter_opencl.match b/test/conformance/enqueue/enqueue_adapter_opencl.match index 27ae88c43d..2736de1e46 100644 --- a/test/conformance/enqueue/enqueue_adapter_opencl.match +++ b/test/conformance/enqueue/enqueue_adapter_opencl.match @@ -1,3 +1,2 @@ {{NONDETERMINISTIC}} -urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__OpenCL___{{.*}}_ {{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled