@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
39
39
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
40
40
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
41
41
42
+ static cl_int enqueueProfilingKernel (
43
+ cl_command_queue queue,
44
+ cl_kernel kernel,
45
+ cl_uint num_events_in_wait_list,
46
+ const cl_event* event_wait_list,
47
+ cl_event* event )
48
+ {
49
+ const size_t one = 1 ;
50
+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51
+ queue,
52
+ kernel,
53
+ 1 ,
54
+ nullptr ,
55
+ &one,
56
+ nullptr ,
57
+ num_events_in_wait_list,
58
+ event_wait_list,
59
+ event );
60
+ }
61
+
42
62
typedef struct _cl_mutable_command_khr
43
63
{
44
64
static bool isValid ( cl_mutable_command_khr command )
@@ -1229,6 +1249,7 @@ typedef struct _cl_command_buffer_khr
1229
1249
(props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
1230
1250
1231
1251
cmdbuf->setupTestQueue (queue);
1252
+ cmdbuf->setupProfilingKernel (queue);
1232
1253
}
1233
1254
}
1234
1255
@@ -1254,6 +1275,11 @@ typedef struct _cl_command_buffer_khr
1254
1275
{
1255
1276
g_pNextDispatch->clReleaseCommandQueue (queue);
1256
1277
}
1278
+
1279
+ for ( auto kernel : ProfilingKernels )
1280
+ {
1281
+ g_pNextDispatch->clReleaseKernel (kernel);
1282
+ }
1257
1283
}
1258
1284
1259
1285
static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1323,17 @@ typedef struct _cl_command_buffer_khr
1297
1323
1298
1324
cl_command_queue getQueue () const
1299
1325
{
1300
- if ( Queues.size () > 0 )
1301
- {
1302
- return Queues[0 ];
1303
- }
1304
- return nullptr ;
1326
+ return Queues.empty () ? nullptr : Queues[0 ];
1305
1327
}
1306
1328
1307
1329
cl_command_queue getTestQueue () const
1308
1330
{
1309
- if ( TestQueues.size () > 0 )
1310
- {
1311
- return TestQueues[0 ];
1312
- }
1313
- return nullptr ;
1331
+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1332
+ }
1333
+
1334
+ cl_kernel getProfilingKernel () const
1335
+ {
1336
+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
1314
1337
}
1315
1338
1316
1339
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1674,6 +1697,7 @@ typedef struct _cl_command_buffer_khr
1674
1697
std::vector<bool > IsInOrder;
1675
1698
std::vector<cl_command_queue> TestQueues;
1676
1699
std::vector<cl_event> BlockingEvents;
1700
+ std::vector<cl_kernel> ProfilingKernels;
1677
1701
1678
1702
std::vector<std::unique_ptr<Command>> Commands;
1679
1703
std::atomic<uint32_t > NextSyncPoint;
@@ -1750,6 +1774,52 @@ typedef struct _cl_command_buffer_khr
1750
1774
}
1751
1775
}
1752
1776
1777
+ void setupProfilingKernel (cl_command_queue queue)
1778
+ {
1779
+ if ( g_KernelForProfiling )
1780
+ {
1781
+ cl_context context = nullptr ;
1782
+ g_pNextDispatch->clGetCommandQueueInfo (
1783
+ queue,
1784
+ CL_QUEUE_CONTEXT,
1785
+ sizeof (context),
1786
+ &context,
1787
+ nullptr );
1788
+
1789
+ cl_device_id device = nullptr ;
1790
+ g_pNextDispatch->clGetCommandQueueInfo (
1791
+ queue,
1792
+ CL_QUEUE_DEVICE,
1793
+ sizeof (device),
1794
+ &device,
1795
+ nullptr );
1796
+
1797
+ const char * kernelString = " kernel void Empty() {}" ;
1798
+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1799
+ context,
1800
+ 1 ,
1801
+ &kernelString,
1802
+ nullptr ,
1803
+ nullptr );
1804
+ g_pNextDispatch->clBuildProgram (
1805
+ program,
1806
+ 1 ,
1807
+ &device,
1808
+ nullptr ,
1809
+ nullptr ,
1810
+ nullptr );
1811
+
1812
+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1813
+ program,
1814
+ " Empty" ,
1815
+ nullptr );
1816
+ g_pNextDispatch->clReleaseProgram (
1817
+ program );
1818
+
1819
+ ProfilingKernels.push_back (kernel);
1820
+ }
1821
+ }
1822
+
1753
1823
_cl_command_buffer_khr (
1754
1824
cl_command_buffer_flags_khr flags,
1755
1825
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1996,7 +2066,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
1996
2066
queue,
1997
2067
num_events_in_wait_list,
1998
2068
event_wait_list,
1999
- event ? &startEvent : nullptr );
2069
+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2070
+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2071
+ {
2072
+ errorCode = enqueueProfilingKernel (
2073
+ queue,
2074
+ cmdbuf->getProfilingKernel (),
2075
+ 0 ,
2076
+ nullptr ,
2077
+ &startEvent );
2078
+ }
2000
2079
}
2001
2080
2002
2081
if ( errorCode == CL_SUCCESS )
@@ -2010,7 +2089,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
2010
2089
queue,
2011
2090
0 ,
2012
2091
nullptr ,
2013
- event );
2092
+ g_KernelForProfiling ? nullptr : event );
2093
+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2094
+ {
2095
+ errorCode = enqueueProfilingKernel (
2096
+ queue,
2097
+ cmdbuf->getProfilingKernel (),
2098
+ 0 ,
2099
+ nullptr ,
2100
+ event );
2101
+ }
2014
2102
}
2015
2103
2016
2104
if ( event )
0 commit comments