Skip to content

Commit dfc7466

Browse files
authored
add the ability to use an empty kernel for command buffer profiling (#148)
* add the ability to use an empty kernel for command buffer profiling * update README
1 parent 038e82a commit dfc7466

File tree

4 files changed

+108
-12
lines changed

4 files changed

+108
-12
lines changed

layers/10_cmdbufemu/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ The following environment variables can modify the behavior of the command buffe
3333
| Environment Variable | Behavior | Example Format |
3434
|----------------------|----------|-----------------|
3535
| `CMDBUFEMU_EnhancedErrorChecking` | Enables additional error checking when commands are added to a command buffer using a command buffer "test queue". By default, the additional error checking is disabled. | `export CMDBUFEMU_EnhancedErrorChecking=1`<br/><br/>`set CMDBUFEMU_EnhancedErrorChecking=1` |
36+
| `CMDBUFEMU_KernelForProfiling` | Enables use of an empty kernel for event profiling instead of event profiling on a command-queue barrier. By default, to minimize overhead, the empty kernel is not used. | `export CMDBUFEMU_KernelForProfiling=1`<br/><br/>`set CMDBUFEMU_KernelForProfiling=1` |
3637

3738
## Known Limitations
3839

layers/10_cmdbufemu/emulate.cpp

Lines changed: 100 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141

42+
static cl_int enqueueProfilingKernel(
43+
cl_command_queue queue,
44+
cl_kernel kernel,
45+
cl_uint num_events_in_wait_list,
46+
const cl_event* event_wait_list,
47+
cl_event* event )
48+
{
49+
const size_t one = 1;
50+
return g_pNextDispatch->clEnqueueNDRangeKernel(
51+
queue,
52+
kernel,
53+
1,
54+
nullptr,
55+
&one,
56+
nullptr,
57+
num_events_in_wait_list,
58+
event_wait_list,
59+
event );
60+
}
61+
4262
typedef struct _cl_mutable_command_khr
4363
{
4464
static bool isValid( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
12291249
(props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
12301250

12311251
cmdbuf->setupTestQueue(queue);
1252+
cmdbuf->setupProfilingKernel(queue);
12321253
}
12331254
}
12341255

@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
12541275
{
12551276
g_pNextDispatch->clReleaseCommandQueue(queue);
12561277
}
1278+
1279+
for( auto kernel : ProfilingKernels )
1280+
{
1281+
g_pNextDispatch->clReleaseKernel(kernel);
1282+
}
12571283
}
12581284

12591285
static bool isValid( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
12971323

12981324
cl_command_queue getQueue() const
12991325
{
1300-
if( Queues.size() > 0 )
1301-
{
1302-
return Queues[0];
1303-
}
1304-
return nullptr;
1326+
return Queues.empty() ? nullptr : Queues[0];
13051327
}
13061328

13071329
cl_command_queue getTestQueue() const
13081330
{
1309-
if( TestQueues.size() > 0 )
1310-
{
1311-
return TestQueues[0];
1312-
}
1313-
return nullptr;
1331+
return TestQueues.empty() ? nullptr : TestQueues[0];
1332+
}
1333+
1334+
cl_kernel getProfilingKernel() const
1335+
{
1336+
return ProfilingKernels.empty() ? nullptr : ProfilingKernels[0];
13141337
}
13151338

13161339
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const
@@ -1674,6 +1697,7 @@ typedef struct _cl_command_buffer_khr
16741697
std::vector<bool> IsInOrder;
16751698
std::vector<cl_command_queue> TestQueues;
16761699
std::vector<cl_event> BlockingEvents;
1700+
std::vector<cl_kernel> ProfilingKernels;
16771701

16781702
std::vector<std::unique_ptr<Command>> Commands;
16791703
std::atomic<uint32_t> NextSyncPoint;
@@ -1750,6 +1774,52 @@ typedef struct _cl_command_buffer_khr
17501774
}
17511775
}
17521776

1777+
void setupProfilingKernel(cl_command_queue queue)
1778+
{
1779+
if( g_KernelForProfiling )
1780+
{
1781+
cl_context context = nullptr;
1782+
g_pNextDispatch->clGetCommandQueueInfo(
1783+
queue,
1784+
CL_QUEUE_CONTEXT,
1785+
sizeof(context),
1786+
&context,
1787+
nullptr );
1788+
1789+
cl_device_id device = nullptr;
1790+
g_pNextDispatch->clGetCommandQueueInfo(
1791+
queue,
1792+
CL_QUEUE_DEVICE,
1793+
sizeof(device),
1794+
&device,
1795+
nullptr );
1796+
1797+
const char* kernelString = "kernel void Empty() {}";
1798+
cl_program program = g_pNextDispatch->clCreateProgramWithSource(
1799+
context,
1800+
1,
1801+
&kernelString,
1802+
nullptr,
1803+
nullptr );
1804+
g_pNextDispatch->clBuildProgram(
1805+
program,
1806+
1,
1807+
&device,
1808+
nullptr,
1809+
nullptr,
1810+
nullptr );
1811+
1812+
cl_kernel kernel = g_pNextDispatch->clCreateKernel(
1813+
program,
1814+
"Empty",
1815+
nullptr );
1816+
g_pNextDispatch->clReleaseProgram(
1817+
program );
1818+
1819+
ProfilingKernels.push_back(kernel);
1820+
}
1821+
}
1822+
17531823
_cl_command_buffer_khr(
17541824
cl_command_buffer_flags_khr flags,
17551825
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1996,7 +2066,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19962066
queue,
19972067
num_events_in_wait_list,
19982068
event_wait_list,
1999-
event ? &startEvent : nullptr);
2069+
event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2070+
if( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2071+
{
2072+
errorCode = enqueueProfilingKernel(
2073+
queue,
2074+
cmdbuf->getProfilingKernel(),
2075+
0,
2076+
nullptr,
2077+
&startEvent );
2078+
}
20002079
}
20012080

20022081
if( errorCode == CL_SUCCESS )
@@ -2010,7 +2089,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20102089
queue,
20112090
0,
20122091
nullptr,
2013-
event );
2092+
g_KernelForProfiling ? nullptr : event );
2093+
if( errorCode == CL_SUCCESS && g_KernelForProfiling )
2094+
{
2095+
errorCode = enqueueProfilingKernel(
2096+
queue,
2097+
cmdbuf->getProfilingKernel(),
2098+
0,
2099+
nullptr,
2100+
event );
2101+
}
20142102
}
20152103

20162104
if( event )

layers/10_cmdbufemu/emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <map>
1111

1212
extern bool g_EnhancedErrorChecking;
13+
extern bool g_KernelForProfiling;
1314

1415
extern const struct _cl_icd_dispatch* g_pNextDispatch;
1516

layers/10_cmdbufemu/main.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
bool g_EnhancedErrorChecking = false;
3636

37+
// Using kernels for profiling can fix issues with some implementations
38+
// that do not properly support event profiling on barrkers.
39+
40+
bool g_KernelForProfiling = false;
41+
3742
const struct _cl_icd_dispatch* g_pNextDispatch = NULL;
3843

3944
static cl_int CL_API_CALL
@@ -283,6 +288,7 @@ CL_API_ENTRY cl_int CL_API_CALL clInitLayer(
283288
_init_dispatch();
284289

285290
getControl("CMDBUFEMU_EnhancedErrorChecking", g_EnhancedErrorChecking);
291+
getControl("CMDBUFEMU_KernelForProfiling", g_KernelForProfiling);
286292

287293
g_pNextDispatch = target_dispatch;
288294

0 commit comments

Comments
 (0)