Skip to content

Commit 0f3c426

Browse files
committed
add the ability to use an empty kernel for command buffer profiling
1 parent 942bf52 commit 0f3c426

File tree

3 files changed

+107
-12
lines changed

3 files changed

+107
-12
lines changed

layers/10_cmdbufemu/emulate.cpp

Lines changed: 100 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141

42+
static cl_int enqueueProfilingKernel(
43+
cl_command_queue queue,
44+
cl_kernel kernel,
45+
cl_uint num_events_in_wait_list,
46+
const cl_event* event_wait_list,
47+
cl_event* event )
48+
{
49+
const size_t one = 1;
50+
return g_pNextDispatch->clEnqueueNDRangeKernel(
51+
queue,
52+
kernel,
53+
1,
54+
nullptr,
55+
&one,
56+
nullptr,
57+
num_events_in_wait_list,
58+
event_wait_list,
59+
event );
60+
}
61+
4262
typedef struct _cl_mutable_command_khr
4363
{
4464
static bool isValid( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
12291249
(props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
12301250

12311251
cmdbuf->setupTestQueue(queue);
1252+
cmdbuf->setupProfilingKernel(queue);
12321253
}
12331254
}
12341255

@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
12541275
{
12551276
g_pNextDispatch->clReleaseCommandQueue(queue);
12561277
}
1278+
1279+
for( auto kernel : ProfilingKernels )
1280+
{
1281+
g_pNextDispatch->clReleaseKernel(kernel);
1282+
}
12571283
}
12581284

12591285
static bool isValid( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
12971323

12981324
cl_command_queue getQueue() const
12991325
{
1300-
if( Queues.size() > 0 )
1301-
{
1302-
return Queues[0];
1303-
}
1304-
return nullptr;
1326+
return Queues.empty() ? nullptr : Queues[0];
13051327
}
13061328

13071329
cl_command_queue getTestQueue() const
13081330
{
1309-
if( TestQueues.size() > 0 )
1310-
{
1311-
return TestQueues[0];
1312-
}
1313-
return nullptr;
1331+
return TestQueues.empty() ? nullptr : TestQueues[0];
1332+
}
1333+
1334+
cl_kernel getProfilingKernel() const
1335+
{
1336+
return ProfilingKernels.empty() ? nullptr : ProfilingKernels[0];
13141337
}
13151338

13161339
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const
@@ -1671,6 +1694,7 @@ typedef struct _cl_command_buffer_khr
16711694
std::vector<bool> IsInOrder;
16721695
std::vector<cl_command_queue> TestQueues;
16731696
std::vector<cl_event> BlockingEvents;
1697+
std::vector<cl_kernel> ProfilingKernels;
16741698

16751699
std::vector<std::unique_ptr<Command>> Commands;
16761700
std::atomic<uint32_t> NextSyncPoint;
@@ -1747,6 +1771,52 @@ typedef struct _cl_command_buffer_khr
17471771
}
17481772
}
17491773

1774+
void setupProfilingKernel(cl_command_queue queue)
1775+
{
1776+
if( g_KernelForProfiling )
1777+
{
1778+
cl_context context = nullptr;
1779+
g_pNextDispatch->clGetCommandQueueInfo(
1780+
queue,
1781+
CL_QUEUE_CONTEXT,
1782+
sizeof(context),
1783+
&context,
1784+
nullptr );
1785+
1786+
cl_device_id device = nullptr;
1787+
g_pNextDispatch->clGetCommandQueueInfo(
1788+
queue,
1789+
CL_QUEUE_DEVICE,
1790+
sizeof(device),
1791+
&device,
1792+
nullptr );
1793+
1794+
const char* kernelString = "kernel void Empty() {}";
1795+
cl_program program = g_pNextDispatch->clCreateProgramWithSource(
1796+
context,
1797+
1,
1798+
&kernelString,
1799+
nullptr,
1800+
nullptr );
1801+
g_pNextDispatch->clBuildProgram(
1802+
program,
1803+
1,
1804+
&device,
1805+
nullptr,
1806+
nullptr,
1807+
nullptr );
1808+
1809+
cl_kernel kernel = g_pNextDispatch->clCreateKernel(
1810+
program,
1811+
"Empty",
1812+
nullptr );
1813+
g_pNextDispatch->clReleaseProgram(
1814+
program );
1815+
1816+
ProfilingKernels.push_back(kernel);
1817+
}
1818+
}
1819+
17501820
_cl_command_buffer_khr(
17511821
cl_command_buffer_flags_khr flags,
17521822
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2063,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19932063
queue,
19942064
num_events_in_wait_list,
19952065
event_wait_list,
1996-
event ? &startEvent : nullptr);
2066+
event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2067+
if( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2068+
{
2069+
errorCode = enqueueProfilingKernel(
2070+
queue,
2071+
cmdbuf->getProfilingKernel(),
2072+
0,
2073+
nullptr,
2074+
&startEvent );
2075+
}
19972076
}
19982077

19992078
if( errorCode == CL_SUCCESS )
@@ -2007,7 +2086,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20072086
queue,
20082087
0,
20092088
nullptr,
2010-
event );
2089+
g_KernelForProfiling ? nullptr : event );
2090+
if( errorCode == CL_SUCCESS && g_KernelForProfiling )
2091+
{
2092+
errorCode = enqueueProfilingKernel(
2093+
queue,
2094+
cmdbuf->getProfilingKernel(),
2095+
0,
2096+
nullptr,
2097+
event );
2098+
}
20112099
}
20122100

20132101
if( event )

layers/10_cmdbufemu/emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <map>
1111

1212
extern bool g_EnhancedErrorChecking;
13+
extern bool g_KernelForProfiling;
1314

1415
extern const struct _cl_icd_dispatch* g_pNextDispatch;
1516

layers/10_cmdbufemu/main.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
bool g_EnhancedErrorChecking = false;
3636

37+
// Using kernels for profiling can fix issues with some implementations
38+
// that do not properly support event profiling on barrkers.
39+
40+
bool g_KernelForProfiling = false;
41+
3742
const struct _cl_icd_dispatch* g_pNextDispatch = NULL;
3843

3944
static cl_int CL_API_CALL
@@ -283,6 +288,7 @@ CL_API_ENTRY cl_int CL_API_CALL clInitLayer(
283288
_init_dispatch();
284289

285290
getControl("CMDBUFEMU_EnhancedErrorChecking", g_EnhancedErrorChecking);
291+
getControl("CMDBUFEMU_KernelForProfiling", g_KernelForProfiling);
286292

287293
g_pNextDispatch = target_dispatch;
288294

0 commit comments

Comments
 (0)