Skip to content

Commit e4c1ccd

Browse files
authored
Initial version of USMChecking (#124)
* initial version of USM checking When USMChecking is enabled, log the type of USM allocation and its associated device when it is set as a kernel argument. * fix typo
1 parent 6ae4791 commit e4c1ccd

File tree

9 files changed

+218
-22
lines changed

9 files changed

+218
-22
lines changed

docs/controls.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ If set to a nonzero value, the Intercept Layer for OpenCL Applications will chec
207207

208208
If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for leaks of various OpenCL objects, such as memory objects and events.
209209

210+
##### `USMChecking` (bool)
211+
212+
If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for incorrect usage of Unified Shared Memory (USM) pointers.
213+
210214
##### `CLInfoLogging` (bool)
211215

212216
If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs().

intercept/src/cli_ext.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -705,7 +705,7 @@ cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL(
705705
#define CL_INVALID_PAK_REFERENCE_IMAGE_INDEX_INTEL CL_INVALID_VALUE
706706

707707
// cl_intel_unified_shared_memory POC
708-
// These enums are in sync with revision H of the USM spec.
708+
// These enums are in sync with revision O of the USM spec.
709709

710710
/* cl_device_info */
711711
#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190
@@ -714,8 +714,7 @@ cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL(
714714
#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193
715715
#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194
716716

717-
// TODO: should this be cl_device_unified_shared_memory_capabilities_intel?
718-
typedef cl_bitfield cl_unified_shared_memory_capabilities_intel;
717+
typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel;
719718

720719
/* cl_unified_shared_memory_capabilities_intel - bitfield */
721720
#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0)
@@ -805,7 +804,7 @@ clSharedMemAllocINTEL(
805804
extern CL_API_ENTRY cl_int CL_API_CALL
806805
clMemFreeINTEL(
807806
cl_context context,
808-
const void* ptr); // TBD: const?
807+
void* ptr);
809808

810809
extern CL_API_ENTRY cl_int CL_API_CALL
811810
clGetMemAllocInfoINTEL(

intercept/src/controls.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ CLI_CONTROL( cl_uint, ContextHintLevel, 0, "If s
5050
CLI_CONTROL( bool, EventCallbackLogging, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will install its own callback for every event callback and log the call to the event callback. The application's event callback will be invoked after the Intercept Layer for OpenCL Applications' event callback." )
5151
CLI_CONTROL( bool, EventChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check and log any events in an event wait list that are invalid or in an error state. This can help to debug complex event dependency issues." )
5252
CLI_CONTROL( bool, LeakChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for leaks of various OpenCL objects, such as memory objects and events." )
53+
CLI_CONTROL( bool, USMChecking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will check for incorrect usage of Unified Shared Memory (USM) pointers." )
5354
CLI_CONTROL( bool, CLInfoLogging, false, "If set to a nonzero value, logs information about the platforms and devices in the system on the first call to clGetPlatformIDs()." )
5455
CLI_CONTROL( std::string, DumpDir, "", "If set, the Intercept Layer for OpenCL Applications will emit logs and dumps to this directory instead of the default directory. The default log and dump directory is \"%SYSTEMDRIVE%\\Intel\\CLIntercept_Dump\\<Process Name>\" on Windows and \"~/CLIntercept_Dump/<Process Name>\" on other operating systems. The log and dump directory must be writeable, otherwise the Intercept Layer for OpenCL Applications will not be able to create or modify log or dump files." )
5556
CLI_CONTROL( bool, AppendPid, false, "If set, the Intercept Layer for OpenCL Applications will append process ID to the log directory name." )

intercept/src/dispatch.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9107,7 +9107,7 @@ clSharedMemAllocINTEL(
91079107
CL_API_ENTRY cl_int CL_API_CALL
91089108
clMemFreeINTEL(
91099109
cl_context context,
9110-
const void* ptr)
9110+
void* ptr)
91119111
{
91129112
CLIntercept* pIntercept = GetIntercept();
91139113

@@ -9196,7 +9196,7 @@ CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgMemPointerINTEL(
91969196
kernel,
91979197
arg_index,
91989198
arg_value );
9199-
SET_KERNEL_ARG_SVM_POINTER( kernel, arg_index, arg_value );
9199+
CHECK_KERNEL_ARG_USM_POINTER( kernel, arg_value );
92009200
CPU_PERFORMANCE_TIMING_START();
92019201

92029202
cl_int retVal = CL_INVALID_OPERATION;

intercept/src/dispatch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1245,7 +1245,7 @@ struct CLdispatch
12451245
// cl_intel_unified_shared_memory
12461246
cl_int (CLI_API_CALL *clMemFreeINTEL) (
12471247
cl_context context,
1248-
const void* ptr); // TBD: const?
1248+
void* ptr);
12491249

12501250
// cl_intel_unified_shared_memory
12511251
cl_int (CLI_API_CALL *clGetMemAllocInfoINTEL) (

intercept/src/enummap.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -806,10 +806,10 @@ CEnumNameMap::CEnumNameMap()
806806
ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MIGRATEMEM_INTEL );
807807
ADD_ENUM_NAME( m_cl_int, CL_COMMAND_MEMADVISE_INTEL );
808808

809-
ADD_ENUM_NAME( m_cl_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL );
810-
ADD_ENUM_NAME( m_cl_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL );
811-
ADD_ENUM_NAME( m_cl_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL );
812-
ADD_ENUM_NAME( m_cl_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL );
809+
ADD_ENUM_NAME( m_cl_device_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL );
810+
ADD_ENUM_NAME( m_cl_device_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL );
811+
ADD_ENUM_NAME( m_cl_device_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL );
812+
ADD_ENUM_NAME( m_cl_device_unified_shared_memory_capabilities_intel, CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL );
813813

814814
ADD_ENUM_NAME( m_cl_mem_alloc_flags_intel, CL_MEM_ALLOC_WRITE_COMBINED_INTEL );
815815

intercept/src/enummap.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class CEnumNameMap
120120
GENERATE_MAP_AND_FUNC( name_program_binary_type, cl_program_binary_type );
121121
GENERATE_MAP_AND_BITFIELD_FUNC( name_svm_mem_flags, cl_svm_mem_flags );
122122
GENERATE_MAP_AND_FUNC( name_gl, GLenum );
123-
GENERATE_MAP_AND_BITFIELD_FUNC( name_device_usm_capabilities, cl_unified_shared_memory_capabilities_intel );
123+
GENERATE_MAP_AND_BITFIELD_FUNC( name_device_usm_capabilities, cl_device_unified_shared_memory_capabilities_intel );
124124
GENERATE_MAP_AND_BITFIELD_FUNC( name_mem_alloc_flags, cl_mem_alloc_flags_intel );
125125

126126
#undef GENERATE_MAP_AND_FUNC

intercept/src/intercept.cpp

Lines changed: 189 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,8 +1599,7 @@ void CLIntercept::getDeviceInfoString(
15991599
{
16001600
str = "";
16011601

1602-
unsigned int i = 0;
1603-
for( i = 0; i < numDevices; i++ )
1602+
for( cl_uint i = 0; i < numDevices; i++ )
16041603
{
16051604
cl_int errorCode = CL_SUCCESS;
16061605

@@ -2241,8 +2240,7 @@ void CLIntercept::logBuild(
22412240

22422241
if( errorCode == CL_SUCCESS )
22432242
{
2244-
size_t i = 0;
2245-
for( i = 0; i < numDevices; i++ )
2243+
for( cl_uint i = 0; i < numDevices; i++ )
22462244
{
22472245
if( m_Config.BuildLogging )
22482246
{
@@ -2442,8 +2440,7 @@ void CLIntercept::logPreferredWorkGroupSizeMultiple(
24422440
}
24432441
if( errorCode == CL_SUCCESS )
24442442
{
2445-
size_t i = 0;
2446-
for( i = 0; i < numDevices; i++ )
2443+
for( cl_uint i = 0; i < numDevices; i++ )
24472444
{
24482445
size_t kernelPreferredWorkGroupSizeMultiple = 0;
24492446
errorCode = dispatch().clGetKernelWorkGroupInfo(
@@ -5982,6 +5979,190 @@ void CLIntercept::checkEventList(
59825979
}
59835980
}
59845981

5982+
///////////////////////////////////////////////////////////////////////////////
5983+
//
5984+
void CLIntercept::checkKernelArgUSMPointer(
5985+
cl_kernel kernel,
5986+
const void* arg )
5987+
{
5988+
std::lock_guard<std::mutex> lock(m_Mutex);
5989+
5990+
if( arg == NULL )
5991+
{
5992+
logf( "mem pointer %p is NULL\n", arg );
5993+
}
5994+
else
5995+
{
5996+
cl_int errorCode = CL_SUCCESS;
5997+
5998+
cl_context context = NULL;
5999+
if( errorCode == CL_SUCCESS )
6000+
{
6001+
errorCode = dispatch().clGetKernelInfo(
6002+
kernel,
6003+
CL_KERNEL_CONTEXT,
6004+
sizeof(context),
6005+
&context,
6006+
NULL );
6007+
}
6008+
6009+
cl_unified_shared_memory_type_intel memType = CL_MEM_TYPE_UNKNOWN_INTEL;
6010+
cl_device_id associatedDevice = NULL;
6011+
if( errorCode == CL_SUCCESS )
6012+
{
6013+
dispatch().clGetMemAllocInfoINTEL(
6014+
context,
6015+
arg,
6016+
CL_MEM_ALLOC_TYPE_INTEL,
6017+
sizeof(memType),
6018+
&memType,
6019+
NULL );
6020+
dispatch().clGetMemAllocInfoINTEL(
6021+
context,
6022+
arg,
6023+
CL_MEM_ALLOC_DEVICE_INTEL,
6024+
sizeof(associatedDevice),
6025+
&associatedDevice,
6026+
NULL );
6027+
6028+
char* deviceName = NULL;
6029+
if( associatedDevice )
6030+
{
6031+
allocateAndGetDeviceInfoString(
6032+
associatedDevice,
6033+
CL_DEVICE_NAME,
6034+
deviceName );
6035+
}
6036+
6037+
if( memType == CL_MEM_TYPE_DEVICE_INTEL )
6038+
{
6039+
if( deviceName )
6040+
{
6041+
logf( "mem pointer %p is a DEVICE pointer associated with device %s\n",
6042+
arg,
6043+
deviceName );
6044+
}
6045+
else if( associatedDevice )
6046+
{
6047+
logf( "mem pointer %p is a DEVICE pointer associated with device %p\n",
6048+
arg,
6049+
associatedDevice );
6050+
}
6051+
else
6052+
{
6053+
CLI_ASSERT( 0 );
6054+
logf( "mem pointer %p is a DEVICE pointer but has no associated device?\n",
6055+
arg );
6056+
}
6057+
}
6058+
else if( memType == CL_MEM_TYPE_HOST_INTEL )
6059+
{
6060+
logf( "mem pointer %p is a HOST pointer\n",
6061+
arg );
6062+
}
6063+
else if( memType == CL_MEM_TYPE_SHARED_INTEL )
6064+
{
6065+
if( deviceName )
6066+
{
6067+
logf( "mem pointer %p is a SHARED pointer associated with device %s\n",
6068+
arg,
6069+
deviceName );
6070+
}
6071+
else if( associatedDevice )
6072+
{
6073+
logf( "mem pointer %p is a SHARED pointer associated with device %p\n",
6074+
arg,
6075+
associatedDevice );
6076+
}
6077+
else
6078+
{
6079+
logf( "mem pointer %p is a SHARED pointer without an associated device\n",
6080+
arg );
6081+
}
6082+
}
6083+
else if( memType == CL_MEM_TYPE_UNKNOWN_INTEL )
6084+
{
6085+
// This could be a system shared USM pointer, or this could be an error.
6086+
// Check the devices associatd with this kernel to see if any support
6087+
// system shared USM allocations.
6088+
cl_program program = NULL;
6089+
if( errorCode == CL_SUCCESS )
6090+
{
6091+
errorCode = dispatch().clGetKernelInfo(
6092+
kernel,
6093+
CL_KERNEL_PROGRAM,
6094+
sizeof(program),
6095+
&program,
6096+
NULL );
6097+
}
6098+
6099+
cl_uint numDevices = 0;
6100+
cl_device_id* deviceList = NULL;
6101+
if( errorCode == CL_SUCCESS )
6102+
{
6103+
errorCode = allocateAndGetProgramDeviceList(
6104+
program,
6105+
numDevices,
6106+
deviceList );
6107+
}
6108+
6109+
if( errorCode == CL_SUCCESS )
6110+
{
6111+
bool supportsSystemSharedUSM = false;
6112+
for( cl_uint d = 0; d < numDevices; d++ )
6113+
{
6114+
cl_device_unified_shared_memory_capabilities_intel usmCaps = 0;
6115+
dispatch().clGetDeviceInfo(
6116+
deviceList[d],
6117+
CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL,
6118+
sizeof(usmCaps),
6119+
&usmCaps,
6120+
NULL);
6121+
if( usmCaps != 0 )
6122+
{
6123+
supportsSystemSharedUSM = true;
6124+
break;
6125+
}
6126+
}
6127+
6128+
if( supportsSystemSharedUSM )
6129+
{
6130+
logf( "mem pointer %p is an UNKNOWN pointer and could be a shared system pointer\n",
6131+
arg );
6132+
}
6133+
else
6134+
{
6135+
logf( "mem pointer %p is an UNKNOWN pointer and no device support shared system pointers!\n",
6136+
arg );
6137+
}
6138+
}
6139+
else
6140+
{
6141+
logf( "mem pointer %p is an UNKNOWN pointer and additional queries returned an error!\n",
6142+
arg );
6143+
}
6144+
6145+
delete [] deviceList;
6146+
}
6147+
else
6148+
{
6149+
CLI_ASSERT( 0 );
6150+
logf( "query for mem pointer %p returned an unknown memory type %08X!\n",
6151+
arg,
6152+
memType );
6153+
}
6154+
6155+
delete [] deviceName;
6156+
}
6157+
else
6158+
{
6159+
logf( "couldn't query context for kernel %p for mem pointer %p!\n",
6160+
kernel,
6161+
arg );
6162+
}
6163+
}
6164+
}
6165+
59856166
///////////////////////////////////////////////////////////////////////////////
59866167
//
59876168
void CLIntercept::startAubCapture(
@@ -6302,8 +6483,7 @@ void CLIntercept::initPrecompiledKernelOverrides(
63026483

63036484
if( tempErrorCode == CL_SUCCESS )
63046485
{
6305-
cl_uint i = 0;
6306-
for( i = 0; i < numDevices; i++ )
6486+
for( cl_uint i = 0; i < numDevices; i++ )
63076487
{
63086488
size_t buildLogSize = 0;
63096489
dispatch().clGetProgramBuildInfo(
@@ -6517,8 +6697,7 @@ void CLIntercept::initBuiltinKernelOverrides(
65176697

65186698
if( tempErrorCode == CL_SUCCESS )
65196699
{
6520-
cl_uint i = 0;
6521-
for( i = 0; i < numDevices; i++ )
6700+
for( cl_uint i = 0; i < numDevices; i++ )
65226701
{
65236702
size_t buildLogSize = 0;
65246703
dispatch().clGetProgramBuildInfo(

intercept/src/intercept.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ class CLIntercept
428428
cl_uint numEvents,
429429
const cl_event* eventList,
430430
cl_event* event );
431+
void checkKernelArgUSMPointer(
432+
cl_kernel kernel,
433+
const void* arg );
431434

432435
void startAubCapture(
433436
const std::string& functionName,
@@ -2044,6 +2047,16 @@ inline bool CLIntercept::checkAubCaptureEnqueueLimits() const
20442047
_event ); \
20452048
}
20462049

2050+
///////////////////////////////////////////////////////////////////////////////
2051+
//
2052+
#define CHECK_KERNEL_ARG_USM_POINTER( _kernel, _arg ) \
2053+
if( pIntercept->config().USMChecking ) \
2054+
{ \
2055+
pIntercept->checkKernelArgUSMPointer( \
2056+
_kernel, \
2057+
_arg ); \
2058+
}
2059+
20472060
///////////////////////////////////////////////////////////////////////////////
20482061
//
20492062
inline std::string CLIntercept::getShortKernelName(

0 commit comments

Comments
 (0)