Skip to content

Commit 8aecda8

Browse files
authored
add tracing and enum map support for new KHR extensions: (#202)
cl_khr_pci_bus_info cl_khr_suggested_local_work_size
1 parent 1b2b344 commit 8aecda8

File tree

5 files changed

+88
-0
lines changed

5 files changed

+88
-0
lines changed

intercept/src/cli_ext.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,18 @@ cl_program CL_API_CALL clCreateProgramWithILKHR(
382382

383383
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
384384

385+
///////////////////////////////////////////////////////////////////////////////
386+
// cl_khr_pci_bus_info
387+
388+
typedef struct _cl_device_pci_bus_info_khr {
389+
cl_uint pci_domain;
390+
cl_uint pci_bus;
391+
cl_uint pci_device;
392+
cl_uint pci_function;
393+
} cl_device_pci_bus_info_khr;
394+
395+
#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F
396+
385397
///////////////////////////////////////////////////////////////////////////////
386398
// cl_khr_priority_hints
387399

@@ -414,6 +426,19 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(
414426
void* param_value,
415427
size_t* param_value_size_ret);
416428

429+
///////////////////////////////////////////////////////////////////////////////
430+
// cl_khr_suggested_local_work_size
431+
432+
extern CL_API_ENTRY
433+
cl_int CL_API_CALL
434+
clGetKernelSuggestedLocalWorkSizeKHR(
435+
cl_command_queue command_queue,
436+
cl_kernel kernel,
437+
cl_uint work_dim,
438+
const size_t* global_work_offset,
439+
const size_t* global_work_size,
440+
size_t* suggested_local_work_size);
441+
417442
///////////////////////////////////////////////////////////////////////////////
418443
// cl_khr_terminate_context
419444

intercept/src/dispatch.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7972,9 +7972,55 @@ CL_API_ENTRY cl_int CL_API_CALL clSetPerformanceConfigurationINTEL(
79727972
NULL_FUNCTION_POINTER_RETURN_ERROR();
79737973
}
79747974

7975+
///////////////////////////////////////////////////////////////////////////////
7976+
//
7977+
// cl_khr_suggested_local_work_size
7978+
CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeKHR(
7979+
cl_command_queue commandQueue,
7980+
cl_kernel kernel,
7981+
cl_uint workDim,
7982+
const size_t *globalWorkOffset,
7983+
const size_t *globalWorkSize,
7984+
size_t *suggestedLocalWorkSize)
7985+
{
7986+
CLIntercept* pIntercept = GetIntercept();
7987+
7988+
if( pIntercept )
7989+
{
7990+
auto dispatchX = pIntercept->dispatchX(commandQueue);
7991+
if( dispatchX.clGetKernelSuggestedLocalWorkSizeKHR )
7992+
{
7993+
GET_ENQUEUE_COUNTER();
7994+
CALL_LOGGING_ENTER_KERNEL(
7995+
kernel,
7996+
"queue = %p, kernel = %p",
7997+
commandQueue,
7998+
kernel );
7999+
CPU_PERFORMANCE_TIMING_START();
8000+
8001+
cl_int retVal = dispatchX.clGetKernelSuggestedLocalWorkSizeKHR(
8002+
commandQueue,
8003+
kernel,
8004+
workDim,
8005+
globalWorkOffset,
8006+
globalWorkSize,
8007+
suggestedLocalWorkSize );
8008+
8009+
CPU_PERFORMANCE_TIMING_END();
8010+
CHECK_ERROR( retVal );
8011+
CALL_LOGGING_EXIT( retVal );
8012+
8013+
return retVal;
8014+
}
8015+
}
8016+
8017+
NULL_FUNCTION_POINTER_RETURN_ERROR();
8018+
}
8019+
79758020
///////////////////////////////////////////////////////////////////////////////
79768021
//
79778022
// Unofficial cl_get_kernel_suggested_local_work_size extension:
8023+
// This function should stay in sync with clGetKernelSuggestedLocalWorkSizeKHR, above.
79788024
CL_API_ENTRY cl_int CL_API_CALL clGetKernelSuggestedLocalWorkSizeINTEL(
79798025
cl_command_queue commandQueue,
79808026
cl_kernel kernel,

intercept/src/dispatch.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,15 @@ struct CLdispatchX
197197
void* param_value,
198198
size_t* param_value_size_ret);
199199

200+
// cl_khr_suggested_local_work_size
201+
cl_int (CL_API_CALL *clGetKernelSuggestedLocalWorkSizeKHR) (
202+
cl_command_queue command_queue,
203+
cl_kernel kernel,
204+
cl_uint work_dim,
205+
const size_t* global_work_offset,
206+
const size_t* global_work_size,
207+
size_t* suggested_local_work_size);
208+
200209
// cl_khr_create_command_queue
201210
cl_command_queue (CL_API_CALL *clCreateCommandQueueWithPropertiesKHR) (
202211
cl_context context,

intercept/src/enummap.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,9 @@ CEnumNameMap::CEnumNameMap()
720720
// cl_khr_initalize_memory
721721
ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_MEMORY_INITIALIZE_KHR );
722722

723+
// cl_khr_pci_bus_info extension
724+
ADD_ENUM_NAME( m_cl_int, CL_DEVICE_PCI_BUS_INFO_KHR );
725+
723726
// cl_khr_priority_hints extension
724727
ADD_ENUM_NAME( m_cl_int, CL_QUEUE_PRIORITY_KHR );
725728

intercept/src/intercept.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5083,6 +5083,9 @@ void CLIntercept::addTimingEvent(
50835083
{
50845084
cl_platform_id platform = getPlatform(device);
50855085

5086+
// TODO: Switch to or add support for clGetKernelSuggestedLocalWorkSizeKHR
5087+
// support is available.
5088+
50865089
if( dispatchX(platform).clGetKernelSuggestedLocalWorkSizeINTEL == NULL )
50875090
{
50885091
getExtensionFunctionAddress(
@@ -10904,6 +10907,8 @@ void* CLIntercept::getExtensionFunctionAddress(
1090410907
CHECK_RETURN_EXTENSION_FUNCTION( clCreateProgramWithILKHR );
1090510908
// cl_khr_subgroups
1090610909
CHECK_RETURN_EXTENSION_FUNCTION( clGetKernelSubGroupInfoKHR );
10910+
// cl_khr_suggested_local_work_size
10911+
CHECK_RETURN_EXTENSION_FUNCTION( clGetKernelSuggestedLocalWorkSizeKHR );
1090710912
// cl_khr_create_command_queue
1090810913
CHECK_RETURN_EXTENSION_FUNCTION( clCreateCommandQueueWithPropertiesKHR );
1090910914

0 commit comments

Comments
 (0)