@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
39
39
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
40
40
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
41
41
42
+ static cl_int enqueueProfilingKernel (
43
+ cl_command_queue queue,
44
+ cl_kernel kernel,
45
+ cl_uint num_events_in_wait_list,
46
+ const cl_event* event_wait_list,
47
+ cl_event* event )
48
+ {
49
+ const size_t one = 1 ;
50
+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51
+ queue,
52
+ kernel,
53
+ 1 ,
54
+ nullptr ,
55
+ &one,
56
+ nullptr ,
57
+ num_events_in_wait_list,
58
+ event_wait_list,
59
+ event );
60
+ }
61
+
42
62
typedef struct _cl_mutable_command_khr
43
63
{
44
64
static bool isValid ( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
1229
1249
(props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
1230
1250
1231
1251
cmdbuf->setupTestQueue (queue);
1252
+ cmdbuf->setupProfilingKernel (queue);
1232
1253
}
1233
1254
}
1234
1255
@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
1254
1275
{
1255
1276
g_pNextDispatch->clReleaseCommandQueue (queue);
1256
1277
}
1278
+
1279
+ for ( auto kernel : ProfilingKernels )
1280
+ {
1281
+ g_pNextDispatch->clReleaseKernel (kernel);
1282
+ }
1257
1283
}
1258
1284
1259
1285
static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
1297
1323
1298
1324
cl_command_queue getQueue () const
1299
1325
{
1300
- if ( Queues.size () > 0 )
1301
- {
1302
- return Queues[0 ];
1303
- }
1304
- return nullptr ;
1326
+ return Queues.empty () ? nullptr : Queues[0 ];
1305
1327
}
1306
1328
1307
1329
cl_command_queue getTestQueue () const
1308
1330
{
1309
- if ( TestQueues.size () > 0 )
1310
- {
1311
- return TestQueues[0 ];
1312
- }
1313
- return nullptr ;
1331
+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1332
+ }
1333
+
1334
+ cl_kernel getProfilingKernel () const
1335
+ {
1336
+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
1314
1337
}
1315
1338
1316
1339
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1671,6 +1694,7 @@ typedef struct _cl_command_buffer_khr
1671
1694
std::vector<bool > IsInOrder;
1672
1695
std::vector<cl_command_queue> TestQueues;
1673
1696
std::vector<cl_event> BlockingEvents;
1697
+ std::vector<cl_kernel> ProfilingKernels;
1674
1698
1675
1699
std::vector<std::unique_ptr<Command>> Commands;
1676
1700
std::atomic<uint32_t > NextSyncPoint;
@@ -1747,6 +1771,52 @@ typedef struct _cl_command_buffer_khr
1747
1771
}
1748
1772
}
1749
1773
1774
+ void setupProfilingKernel (cl_command_queue queue)
1775
+ {
1776
+ if ( g_KernelForProfiling )
1777
+ {
1778
+ cl_context context = nullptr ;
1779
+ g_pNextDispatch->clGetCommandQueueInfo (
1780
+ queue,
1781
+ CL_QUEUE_CONTEXT,
1782
+ sizeof (context),
1783
+ &context,
1784
+ nullptr );
1785
+
1786
+ cl_device_id device = nullptr ;
1787
+ g_pNextDispatch->clGetCommandQueueInfo (
1788
+ queue,
1789
+ CL_QUEUE_DEVICE,
1790
+ sizeof (device),
1791
+ &device,
1792
+ nullptr );
1793
+
1794
+ const char * kernelString = " kernel void Empty() {}" ;
1795
+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1796
+ context,
1797
+ 1 ,
1798
+ &kernelString,
1799
+ nullptr ,
1800
+ nullptr );
1801
+ g_pNextDispatch->clBuildProgram (
1802
+ program,
1803
+ 1 ,
1804
+ &device,
1805
+ nullptr ,
1806
+ nullptr ,
1807
+ nullptr );
1808
+
1809
+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1810
+ program,
1811
+ " Empty" ,
1812
+ nullptr );
1813
+ g_pNextDispatch->clReleaseProgram (
1814
+ program );
1815
+
1816
+ ProfilingKernels.push_back (kernel);
1817
+ }
1818
+ }
1819
+
1750
1820
_cl_command_buffer_khr (
1751
1821
cl_command_buffer_flags_khr flags,
1752
1822
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2063,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
1993
2063
queue,
1994
2064
num_events_in_wait_list,
1995
2065
event_wait_list,
1996
- event ? &startEvent : nullptr );
2066
+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2067
+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2068
+ {
2069
+ errorCode = enqueueProfilingKernel (
2070
+ queue,
2071
+ cmdbuf->getProfilingKernel (),
2072
+ 0 ,
2073
+ nullptr ,
2074
+ &startEvent );
2075
+ }
1997
2076
}
1998
2077
1999
2078
if ( errorCode == CL_SUCCESS )
@@ -2007,7 +2086,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
2007
2086
queue,
2008
2087
0 ,
2009
2088
nullptr ,
2010
- event );
2089
+ g_KernelForProfiling ? nullptr : event );
2090
+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2091
+ {
2092
+ errorCode = enqueueProfilingKernel (
2093
+ queue,
2094
+ cmdbuf->getProfilingKernel (),
2095
+ 0 ,
2096
+ nullptr ,
2097
+ event );
2098
+ }
2011
2099
}
2012
2100
2013
2101
if ( event )
0 commit comments