@@ -7972,9 +7972,55 @@ CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL(
79727972 NULL_FUNCTION_POINTER_RETURN_ERROR ();
79737973}
79747974
7975+ // /////////////////////////////////////////////////////////////////////////////
7976+ //
7977+ // cl_khr_suggested_local_work_size
7978+ CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeKHR (
7979+ cl_command_queue commandQueue,
7980+ cl_kernel kernel,
7981+ cl_uint workDim,
7982+ const size_t *globalWorkOffset,
7983+ const size_t *globalWorkSize,
7984+ size_t *suggestedLocalWorkSize)
7985+ {
7986+ CLIntercept* pIntercept = GetIntercept ();
7987+
7988+ if ( pIntercept )
7989+ {
7990+ auto dispatchX = pIntercept->dispatchX (commandQueue);
7991+ if ( dispatchX.clGetKernelSuggestedLocalWorkSizeKHR )
7992+ {
7993+ GET_ENQUEUE_COUNTER ();
7994+ CALL_LOGGING_ENTER_KERNEL (
7995+ kernel,
7996+ " queue = %p, kernel = %p" ,
7997+ commandQueue,
7998+ kernel );
7999+ CPU_PERFORMANCE_TIMING_START ();
8000+
8001+ cl_int retVal = dispatchX.clGetKernelSuggestedLocalWorkSizeKHR (
8002+ commandQueue,
8003+ kernel,
8004+ workDim,
8005+ globalWorkOffset,
8006+ globalWorkSize,
8007+ suggestedLocalWorkSize );
8008+
8009+ CPU_PERFORMANCE_TIMING_END ();
8010+ CHECK_ERROR ( retVal );
8011+ CALL_LOGGING_EXIT ( retVal );
8012+
8013+ return retVal;
8014+ }
8015+ }
8016+
8017+ NULL_FUNCTION_POINTER_RETURN_ERROR ();
8018+ }
8019+
79758020// /////////////////////////////////////////////////////////////////////////////
79768021//
79778022// Unofficial cl_get_kernel_suggested_local_work_size extension:
8023+ // This function should stay in sync with clGetKernelSuggestedLocalWorkSizeKHR, above.
79788024CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL (
79798025 cl_command_queue commandQueue,
79808026 cl_kernel kernel,
0 commit comments