@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
39
39
CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
40
40
CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
41
41
42
+ static cl_int enqueueProfilingKernel (
43
+ cl_command_queue queue,
44
+ cl_kernel kernel,
45
+ cl_uint num_events_in_wait_list,
46
+ const cl_event* event_wait_list,
47
+ cl_event* event )
48
+ {
49
+ const size_t one = 1 ;
50
+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51
+ queue,
52
+ kernel,
53
+ 1 ,
54
+ nullptr ,
55
+ &one,
56
+ nullptr ,
57
+ num_events_in_wait_list,
58
+ event_wait_list,
59
+ event );
60
+ }
61
+
42
62
typedef struct _cl_mutable_command_khr
43
63
{
44
64
static bool isValid ( cl_mutable_command_khr command )
@@ -1254,6 +1274,11 @@ typedef struct _cl_command_buffer_khr
1254
1274
{
1255
1275
g_pNextDispatch->clReleaseCommandQueue (queue);
1256
1276
}
1277
+
1278
+ for ( auto kernel : ProfilingKernels )
1279
+ {
1280
+ g_pNextDispatch->clReleaseKernel (kernel);
1281
+ }
1257
1282
}
1258
1283
1259
1284
static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1322,17 @@ typedef struct _cl_command_buffer_khr
1297
1322
1298
1323
cl_command_queue getQueue () const
1299
1324
{
1300
- if ( Queues.size () > 0 )
1301
- {
1302
- return Queues[0 ];
1303
- }
1304
- return nullptr ;
1325
+ return Queues.empty () ? nullptr : Queues[0 ];
1305
1326
}
1306
1327
1307
1328
cl_command_queue getTestQueue () const
1308
1329
{
1309
- if ( TestQueues.size () > 0 )
1310
- {
1311
- return TestQueues[0 ];
1312
- }
1313
- return nullptr ;
1330
+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1331
+ }
1332
+
1333
+ cl_kernel getProfilingKernel () const
1334
+ {
1335
+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
1314
1336
}
1315
1337
1316
1338
cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1671,6 +1693,7 @@ typedef struct _cl_command_buffer_khr
1671
1693
std::vector<bool > IsInOrder;
1672
1694
std::vector<cl_command_queue> TestQueues;
1673
1695
std::vector<cl_event> BlockingEvents;
1696
+ std::vector<cl_kernel> ProfilingKernels;
1674
1697
1675
1698
std::vector<std::unique_ptr<Command>> Commands;
1676
1699
std::atomic<uint32_t > NextSyncPoint;
@@ -1747,6 +1770,52 @@ typedef struct _cl_command_buffer_khr
1747
1770
}
1748
1771
}
1749
1772
1773
+ void setupProfilingKernel (cl_command_queue queue)
1774
+ {
1775
+ if ( g_KernelForProfiling )
1776
+ {
1777
+ cl_context context = nullptr ;
1778
+ g_pNextDispatch->clGetCommandQueueInfo (
1779
+ queue,
1780
+ CL_QUEUE_CONTEXT,
1781
+ sizeof (context),
1782
+ &context,
1783
+ nullptr );
1784
+
1785
+ cl_device_id device = nullptr ;
1786
+ g_pNextDispatch->clGetCommandQueueInfo (
1787
+ queue,
1788
+ CL_QUEUE_DEVICE,
1789
+ sizeof (device),
1790
+ &device,
1791
+ nullptr );
1792
+
1793
+ const char * kernelString = " kernel void Empty() {}" ;
1794
+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1795
+ context,
1796
+ 1 ,
1797
+ &kernelString,
1798
+ nullptr ,
1799
+ nullptr );
1800
+ g_pNextDispatch->clBuildProgram (
1801
+ program,
1802
+ 1 ,
1803
+ &device,
1804
+ nullptr ,
1805
+ nullptr ,
1806
+ nullptr );
1807
+
1808
+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1809
+ program,
1810
+ " Empty" ,
1811
+ nullptr );
1812
+ g_pNextDispatch->clReleaseProgram (
1813
+ program );
1814
+
1815
+ ProfilingKernels.push_back (kernel);
1816
+ }
1817
+ }
1818
+
1750
1819
_cl_command_buffer_khr (
1751
1820
cl_command_buffer_flags_khr flags,
1752
1821
cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2062,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
1993
2062
queue,
1994
2063
num_events_in_wait_list,
1995
2064
event_wait_list,
1996
- event ? &startEvent : nullptr );
2065
+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2066
+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2067
+ {
2068
+ errorCode = enqueueProfilingKernel (
2069
+ queue,
2070
+ cmdbuf->getProfilingKernel (),
2071
+ 0 ,
2072
+ nullptr ,
2073
+ &startEvent );
2074
+ }
1997
2075
}
1998
2076
1999
2077
if ( errorCode == CL_SUCCESS )
@@ -2007,7 +2085,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
2007
2085
queue,
2008
2086
0 ,
2009
2087
nullptr ,
2010
- event );
2088
+ g_KernelForProfiling ? nullptr : event );
2089
+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2090
+ {
2091
+ errorCode = enqueueProfilingKernel (
2092
+ queue,
2093
+ cmdbuf->getProfilingKernel (),
2094
+ 0 ,
2095
+ nullptr ,
2096
+ event );
2097
+ }
2011
2098
}
2012
2099
2013
2100
if ( event )
0 commit comments