Skip to content

Commit 788c47c

Browse files
authored
add support for enabling a MDAPI queue via queue properties (#251)
1 parent 03b6b68 commit 788c47c

File tree

4 files changed

+189
-50
lines changed

4 files changed

+189
-50
lines changed

intercept/mdapi/intercept_mdapi.cpp

Lines changed: 174 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,12 @@
1414

1515
///////////////////////////////////////////////////////////////////////////////
1616
//
17-
static bool convertPropertiesToOCL1_2(
17+
static bool convertPropertiesArrayToBitfield(
1818
const cl_queue_properties* properties,
19-
cl_command_queue_properties& ocl1_2_properties )
19+
cl_command_queue_properties& propertiesBits )
2020
{
2121
if( properties )
2222
{
23-
// Convert properties from array of pairs (OCL2.0) to bitfield (OCL1.2)
2423
for( int i = 0; properties[ i ] != 0; i += 2 )
2524
{
2625
switch( properties[ i ] )
@@ -32,16 +31,12 @@ static bool convertPropertiesToOCL1_2(
3231
case CL_QUEUE_PROFILING_ENABLE:
3332
case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
3433
case CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
35-
ocl1_2_properties |= properties[ i + 1 ];
34+
propertiesBits |= properties[ i + 1 ];
3635
break;
3736
default:
3837
return false;
3938
}
4039
break;
41-
case CL_QUEUE_PRIORITY_KHR:
42-
case CL_QUEUE_THROTTLE_KHR:
43-
// Skip / ignore these properties.
44-
break;
4540
default:
4641
return false;
4742
}
@@ -51,6 +46,95 @@ static bool convertPropertiesToOCL1_2(
5146
return true;
5247
}
5348

49+
///////////////////////////////////////////////////////////////////////////////
50+
//
51+
static void createMDAPICommandQueueProperties(
52+
cl_uint configuration,
53+
const cl_queue_properties* properties,
54+
cl_queue_properties*& pLocalQueueProperties )
55+
{
56+
bool addMDAPIProperties = true;
57+
bool addMDAPIConfiguration = true;
58+
59+
size_t numProperties = 0;
60+
if( properties )
61+
{
62+
while( properties[ numProperties ] != 0 )
63+
{
64+
switch( properties[ numProperties ] )
65+
{
66+
case CL_QUEUE_MDAPI_PROPERTIES_INTEL:
67+
addMDAPIProperties = false;
68+
break;
69+
case CL_QUEUE_MDAPI_CONFIGURATION_INTEL:
70+
addMDAPIConfiguration = false;
71+
break;
72+
default:
73+
break;
74+
}
75+
numProperties += 2;
76+
}
77+
}
78+
79+
if( addMDAPIProperties )
80+
{
81+
numProperties += 2;
82+
}
83+
if( addMDAPIConfiguration )
84+
{
85+
numProperties += 2;
86+
}
87+
88+
// Allocate a new array of properties. We need to allocate two
89+
// properties for each pair, plus one property for the terminating
90+
// zero.
91+
pLocalQueueProperties = new cl_queue_properties[ numProperties + 1 ];
92+
if( pLocalQueueProperties )
93+
{
94+
// Copy the old properties array to the new properties array,
95+
// if the new properties array exists.
96+
numProperties = 0;
97+
if( properties )
98+
{
99+
while( properties[ numProperties ] != 0 )
100+
{
101+
pLocalQueueProperties[ numProperties ] = properties[ numProperties ];
102+
if( properties[ numProperties ] == CL_QUEUE_MDAPI_PROPERTIES_INTEL )
103+
{
104+
CLI_ASSERT( addMDAPIProperties == false );
105+
pLocalQueueProperties[ numProperties + 1 ] = CL_QUEUE_MDAPI_ENABLE_INTEL;
106+
}
107+
else if( properties[ numProperties ] == CL_QUEUE_MDAPI_CONFIGURATION_INTEL )
108+
{
109+
CLI_ASSERT( addMDAPIConfiguration == false );
110+
pLocalQueueProperties[ numProperties + 1 ] = configuration;
111+
}
112+
else
113+
{
114+
pLocalQueueProperties[ numProperties + 1 ] =
115+
properties[ numProperties + 1 ];
116+
}
117+
numProperties += 2;
118+
}
119+
}
120+
if( addMDAPIProperties )
121+
{
122+
pLocalQueueProperties[ numProperties] = CL_QUEUE_MDAPI_PROPERTIES_INTEL;
123+
pLocalQueueProperties[ numProperties + 1 ] = CL_QUEUE_MDAPI_ENABLE_INTEL;
124+
numProperties += 2;
125+
}
126+
if( addMDAPIConfiguration )
127+
{
128+
pLocalQueueProperties[ numProperties] = CL_QUEUE_MDAPI_CONFIGURATION_INTEL;
129+
pLocalQueueProperties[ numProperties + 1 ] = configuration;
130+
numProperties += 2;
131+
}
132+
133+
// Add the terminating zero.
134+
pLocalQueueProperties[ numProperties ] = 0;
135+
}
136+
}
137+
54138
///////////////////////////////////////////////////////////////////////////////
55139
//
56140
void CLIntercept::initCustomPerfCounters()
@@ -154,41 +238,38 @@ cl_command_queue CLIntercept::createMDAPICommandQueue(
154238
{
155239
log( "Metrics discovery is not initialized!\n" );
156240
}
157-
else
241+
else if( m_pMDHelper->ActivateMetricSet() )
158242
{
159-
if( m_pMDHelper->ActivateMetricSet() )
160-
{
161-
cl_int errorCode = CL_SUCCESS;
162-
cl_uint configuration = m_pMDHelper->GetMetricsConfiguration();
163-
logf( "Calling clCreatePerfCountersCommandQueueINTEL with configuration %u....\n",
164-
configuration);
243+
cl_int errorCode = CL_SUCCESS;
244+
cl_uint configuration = m_pMDHelper->GetMetricsConfiguration();
245+
logf( "Calling clCreatePerfCountersCommandQueueINTEL with configuration %u....\n",
246+
configuration);
165247

166-
retVal = dispatchX.clCreatePerfCountersCommandQueueINTEL(
167-
context,
168-
device,
169-
properties,
170-
configuration,
171-
&errorCode );
172-
if( retVal == NULL )
173-
{
174-
logf( "clCreatePerfCountersCommandQueueINTEL returned %s (%d)!\n",
175-
enumName().name( errorCode ).c_str(),
176-
errorCode );
177-
}
178-
else
179-
{
180-
log( "clCreatePerfCountersCommandQueueINTEL succeeded.\n" );
181-
}
182-
if( errcode_ret )
183-
{
184-
errcode_ret[0] = errorCode;
185-
}
248+
retVal = dispatchX.clCreatePerfCountersCommandQueueINTEL(
249+
context,
250+
device,
251+
properties,
252+
configuration,
253+
&errorCode );
254+
if( retVal == NULL )
255+
{
256+
logf( "clCreatePerfCountersCommandQueueINTEL returned %s (%d)!\n",
257+
enumName().name( errorCode ).c_str(),
258+
errorCode );
186259
}
187260
else
188261
{
189-
log( "Metric Discovery: Couldn't activate metric set!\n" );
262+
log( "clCreatePerfCountersCommandQueueINTEL succeeded.\n" );
263+
}
264+
if( errcode_ret )
265+
{
266+
errcode_ret[0] = errorCode;
190267
}
191268
}
269+
else
270+
{
271+
log( "Metric Discovery: Couldn't activate metric set!\n" );
272+
}
192273

193274
return retVal;
194275
}
@@ -203,24 +284,71 @@ cl_command_queue CLIntercept::createMDAPICommandQueue(
203284
{
204285
cl_command_queue retVal = NULL;
205286

206-
// This is a temporary workaround until we have a
207-
// clCreatePerfCountersCommandQueueWithPropertiesINTEL API.
208-
// It converts the OpenCL 2.0 command queue properties to
209-
// OpenCL 1.2 command queue properties, unless an unsupported
210-
// command queue property is specified. If an unsupported
211-
// property is specified then we cannot create an MDAPI command
212-
// queue.
287+
// Some drivers only support creating MDAPI command queues via
288+
// clCreatePerfCountersCommandQueueINTEL. So, for maximum compatibility,
289+
// first try to convert the passed-in properties array to a properties
290+
// bitfield and use clCreatePerfCountersCommandQueueINTEL. If this
291+
// fails, we will instead try a newer codepath that creates an MDAPI
292+
// command queue using new property-value pairs.
213293

214-
cl_command_queue_properties ocl1_2_properties = 0;
215-
if( convertPropertiesToOCL1_2( properties, ocl1_2_properties ) )
294+
cl_command_queue_properties propertiesBits = 0;
295+
if( convertPropertiesArrayToBitfield( properties, propertiesBits ) )
216296
{
217297
retVal = createMDAPICommandQueue(
218298
context,
219299
device,
220-
ocl1_2_properties,
300+
propertiesBits,
221301
errcode_ret );
222302
}
223303

304+
if( retVal == NULL )
305+
{
306+
std::lock_guard<std::mutex> lock(m_Mutex);
307+
308+
if( m_pMDHelper == NULL )
309+
{
310+
log( "Metrics discovery is not initialized!\n" );
311+
}
312+
else if( m_pMDHelper->ActivateMetricSet() )
313+
{
314+
cl_int errorCode = CL_SUCCESS;
315+
cl_uint configuration = m_pMDHelper->GetMetricsConfiguration();
316+
317+
logf( "Creating MDAPI command queue properties for configuration %u....\n",
318+
configuration);
319+
320+
cl_queue_properties* newProperties = NULL;
321+
createMDAPICommandQueueProperties(
322+
configuration,
323+
properties,
324+
newProperties );
325+
326+
retVal = createCommandQueueWithProperties(
327+
context,
328+
device,
329+
newProperties,
330+
&errorCode );
331+
if( retVal == NULL )
332+
{
333+
logf( "MDAPI clCreateCommandQueueWithProperties returned %s (%d)!\n",
334+
enumName().name( errorCode ).c_str(),
335+
errorCode );
336+
}
337+
else
338+
{
339+
log( "MDAPI clCreateCommandQueueWithProperties succeeded.\n" );
340+
}
341+
if( errcode_ret )
342+
{
343+
errcode_ret[0] = errorCode;
344+
}
345+
}
346+
else
347+
{
348+
log( "Metric Discovery: Couldn't activate metric set!\n" );
349+
}
350+
}
351+
224352
return retVal;
225353
}
226354

intercept/src/cli_ext.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,6 +1542,11 @@ cl_int CL_API_CALL clSetPerformanceConfigurationINTEL(
15421542
cl_uint* offsets,
15431543
cl_uint* values );
15441544

1545+
#define CL_QUEUE_MDAPI_PROPERTIES_INTEL 0x425E
1546+
#define CL_QUEUE_MDAPI_CONFIGURATION_INTEL 0x425F
1547+
1548+
#define CL_QUEUE_MDAPI_ENABLE_INTEL (1 << 0)
1549+
15451550
///////////////////////////////////////////////////////////////////////////////
15461551
// Unofficial kernel profiling extension:
15471552

intercept/src/enummap.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,10 @@ CEnumNameMap::CEnumNameMap()
11301130
// cl_qcom_ion_host_ptr extension
11311131
ADD_ENUM_NAME( m_cl_int, CL_MEM_ION_HOST_PTR_QCOM );
11321132

1133+
// Unofficial MDAPI extension:
1134+
ADD_ENUM_NAME( m_cl_int, CL_QUEUE_MDAPI_PROPERTIES_INTEL );
1135+
ADD_ENUM_NAME( m_cl_int, CL_QUEUE_MDAPI_CONFIGURATION_INTEL );
1136+
11331137
// Unofficial kernel profiling extension:
11341138
ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_KERNEL_PROFILING_MODES_COUNT_INTEL );
11351139
ADD_ENUM_NAME( m_cl_int, CL_CONTEXT_KERNEL_PROFILING_MODE_INFO_INTEL );

intercept/src/intercept.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,6 +2210,8 @@ void CLIntercept::getCommandQueuePropertiesString(
22102210
case CL_QUEUE_SIZE:
22112211
case CL_QUEUE_FAMILY_INTEL:
22122212
case CL_QUEUE_INDEX_INTEL:
2213+
case CL_QUEUE_MDAPI_PROPERTIES_INTEL:
2214+
case CL_QUEUE_MDAPI_CONFIGURATION_INTEL:
22132215
{
22142216
const cl_uint* pu = (const cl_uint*)( properties + 1);
22152217
CLI_SPRINTF( s, 256, "%u", pu[0] );
@@ -3457,7 +3459,7 @@ void CLIntercept::contextCallbackOverrideInit(
34573459
// the context properties for the performance hint enum. We need to
34583460
// do this anyways to count the number of property pairs.
34593461
bool foundPerformanceHintEnum = false;
3460-
int numProperties = 0;
3462+
size_t numProperties = 0;
34613463
if( properties )
34623464
{
34633465
while( properties[ numProperties ] != 0 )
@@ -5321,7 +5323,7 @@ void CLIntercept::createCommandQueueProperties(
53215323
config().DefaultQueueThrottleHint != 0 &&
53225324
checkDeviceForExtension( device, "cl_khr_throttle_hints" );
53235325

5324-
int numProperties = 0;
5326+
size_t numProperties = 0;
53255327
if( addCommandQueuePropertiesEnum )
53265328
{
53275329
numProperties += 2;
@@ -5394,7 +5396,7 @@ void CLIntercept::createCommandQueuePropertiesOverride(
53945396
config().DefaultQueueThrottleHint != 0 &&
53955397
checkDeviceForExtension( device, "cl_khr_throttle_hints" );
53965398

5397-
int numProperties = 0;
5399+
size_t numProperties = 0;
53985400
if( properties )
53995401
{
54005402
while( properties[ numProperties ] != 0 )
@@ -8001,7 +8003,7 @@ void CLIntercept::usmAllocPropertiesOverride(
80018003

80028004
bool addMemFlagsEnum = config().RelaxAllocationLimits != 0;
80038005

8004-
int numProperties = 0;
8006+
size_t numProperties = 0;
80058007
if( properties )
80068008
{
80078009
while( properties[ numProperties ] != 0 )

0 commit comments

Comments
 (0)