Skip to content

Commit 636810b

Browse files
committed
add the ability to use an empty kernel for command buffer profiling
1 parent 942bf52 commit 636810b

File tree

3 files changed

+106
-12
lines changed

3 files changed

+106
-12
lines changed

layers/10_cmdbufemu/emulate.cpp

Lines changed: 99 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141

42+
static cl_int enqueueProfilingKernel(
43+
cl_command_queue queue,
44+
cl_kernel kernel,
45+
cl_uint num_events_in_wait_list,
46+
const cl_event* event_wait_list,
47+
cl_event* event )
48+
{
49+
const size_t one = 1;
50+
return g_pNextDispatch->clEnqueueNDRangeKernel(
51+
queue,
52+
kernel,
53+
1,
54+
nullptr,
55+
&one,
56+
nullptr,
57+
num_events_in_wait_list,
58+
event_wait_list,
59+
event );
60+
}
61+
4262
typedef struct _cl_mutable_command_khr
4363
{
4464
static bool isValid( cl_mutable_command_khr command )
@@ -1254,6 +1274,11 @@ typedef struct _cl_command_buffer_khr
12541274
{
12551275
g_pNextDispatch->clReleaseCommandQueue(queue);
12561276
}
1277+
1278+
for( auto kernel : ProfilingKernels )
1279+
{
1280+
g_pNextDispatch->clReleaseKernel(kernel);
1281+
}
12571282
}
12581283

12591284
static bool isValid( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1322,17 @@ typedef struct _cl_command_buffer_khr
12971322

12981323
cl_command_queue getQueue() const
12991324
{
1300-
if( Queues.size() > 0 )
1301-
{
1302-
return Queues[0];
1303-
}
1304-
return nullptr;
1325+
return Queues.empty() ? nullptr : Queues[0];
13051326
}
13061327

13071328
cl_command_queue getTestQueue() const
13081329
{
1309-
if( TestQueues.size() > 0 )
1310-
{
1311-
return TestQueues[0];
1312-
}
1313-
return nullptr;
1330+
return TestQueues.empty() ? nullptr : TestQueues[0];
1331+
}
1332+
1333+
cl_kernel getProfilingKernel() const
1334+
{
1335+
return ProfilingKernels.empty() ? nullptr : ProfilingKernels[0];
13141336
}
13151337

13161338
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts() const
@@ -1671,6 +1693,7 @@ typedef struct _cl_command_buffer_khr
16711693
std::vector<bool> IsInOrder;
16721694
std::vector<cl_command_queue> TestQueues;
16731695
std::vector<cl_event> BlockingEvents;
1696+
std::vector<cl_kernel> ProfilingKernels;
16741697

16751698
std::vector<std::unique_ptr<Command>> Commands;
16761699
std::atomic<uint32_t> NextSyncPoint;
@@ -1747,6 +1770,52 @@ typedef struct _cl_command_buffer_khr
17471770
}
17481771
}
17491772

1773+
void setupProfilingKernel(cl_command_queue queue)
1774+
{
1775+
if( g_KernelForProfiling )
1776+
{
1777+
cl_context context = nullptr;
1778+
g_pNextDispatch->clGetCommandQueueInfo(
1779+
queue,
1780+
CL_QUEUE_CONTEXT,
1781+
sizeof(context),
1782+
&context,
1783+
nullptr );
1784+
1785+
cl_device_id device = nullptr;
1786+
g_pNextDispatch->clGetCommandQueueInfo(
1787+
queue,
1788+
CL_QUEUE_DEVICE,
1789+
sizeof(device),
1790+
&device,
1791+
nullptr );
1792+
1793+
const char* kernelString = "kernel void Empty() {}";
1794+
cl_program program = g_pNextDispatch->clCreateProgramWithSource(
1795+
context,
1796+
1,
1797+
&kernelString,
1798+
nullptr,
1799+
nullptr );
1800+
g_pNextDispatch->clBuildProgram(
1801+
program,
1802+
1,
1803+
&device,
1804+
nullptr,
1805+
nullptr,
1806+
nullptr );
1807+
1808+
cl_kernel kernel = g_pNextDispatch->clCreateKernel(
1809+
program,
1810+
"Empty",
1811+
nullptr );
1812+
g_pNextDispatch->clReleaseProgram(
1813+
program );
1814+
1815+
ProfilingKernels.push_back(kernel);
1816+
}
1817+
}
1818+
17501819
_cl_command_buffer_khr(
17511820
cl_command_buffer_flags_khr flags,
17521821
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2062,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19932062
queue,
19942063
num_events_in_wait_list,
19952064
event_wait_list,
1996-
event ? &startEvent : nullptr);
2065+
event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2066+
if( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2067+
{
2068+
errorCode = enqueueProfilingKernel(
2069+
queue,
2070+
cmdbuf->getProfilingKernel(),
2071+
0,
2072+
nullptr,
2073+
&startEvent );
2074+
}
19972075
}
19982076

19992077
if( errorCode == CL_SUCCESS )
@@ -2007,7 +2085,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20072085
queue,
20082086
0,
20092087
nullptr,
2010-
event );
2088+
g_KernelForProfiling ? nullptr : event );
2089+
if( errorCode == CL_SUCCESS && g_KernelForProfiling )
2090+
{
2091+
errorCode = enqueueProfilingKernel(
2092+
queue,
2093+
cmdbuf->getProfilingKernel(),
2094+
0,
2095+
nullptr,
2096+
event );
2097+
}
20112098
}
20122099

20132100
if( event )

layers/10_cmdbufemu/emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <map>
1111

1212
extern bool g_EnhancedErrorChecking;
13+
extern bool g_KernelForProfiling;
1314

1415
extern const struct _cl_icd_dispatch* g_pNextDispatch;
1516

layers/10_cmdbufemu/main.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
bool g_EnhancedErrorChecking = false;
3636

37+
// Using kernels for profiling can fix issues with some implementations
38+
// that do not properly support event profiling on barrkers.
39+
40+
bool g_KernelForProfiling = false;
41+
3742
const struct _cl_icd_dispatch* g_pNextDispatch = NULL;
3843

3944
static cl_int CL_API_CALL
@@ -283,6 +288,7 @@ CL_API_ENTRY cl_int CL_API_CALL clInitLayer(
283288
_init_dispatch();
284289

285290
getControl("CMDBUFEMU_EnhancedErrorChecking", g_EnhancedErrorChecking);
291+
getControl("CMDBUFEMU_KernelForProfiling", g_KernelForProfiling);
286292

287293
g_pNextDispatch = target_dispatch;
288294

0 commit comments

Comments
 (0)