@@ -22,6 +22,10 @@ struct ur_queue_handle_t_ {
22
22
23
23
std::vector<native_type> ComputeStreams;
24
24
std::vector<native_type> TransferStreams;
25
+ // Stream used solely when profiling is enabled
26
+ native_type ProfStream;
27
+ static std::once_flag ProfStreamFlag;
28
+ bool ProfStreamCreated;
25
29
// DelayCompute keeps track of which streams have been recently reused and
26
30
// their next use should be delayed. If a stream has been recently reused it
27
31
// will be skipped the next time it would be selected round-robin style. When
@@ -59,8 +63,8 @@ struct ur_queue_handle_t_ {
59
63
ur_context_handle_t Context, ur_device_handle_t Device,
60
64
unsigned int Flags, ur_queue_flags_t URFlags, int Priority,
61
65
bool BackendOwns = true )
62
- : ComputeStreams{std::move (ComputeStreams)}, TransferStreams{ std::move (
63
- TransferStreams)},
66
+ : ComputeStreams{std::move (ComputeStreams)},
67
+ TransferStreams{ std::move ( TransferStreams)},
64
68
DelayCompute (this ->ComputeStreams.size(), false ),
65
69
ComputeAppliedBarrier (this ->ComputeStreams.size()),
66
70
TransferAppliedBarrier (this ->TransferStreams.size()), Context{Context},
@@ -95,6 +99,17 @@ struct ur_queue_handle_t_ {
95
99
native_type getNextTransferStream ();
96
100
native_type get () { return getNextComputeStream (); };
97
101
102
+ // Function which creates the profiling stream. Called only from makeNative
103
+ // in event handle, if the profiling is enabled.
104
+ void createProfilingStream () {
105
+ std::call_once (ProfStreamFlag, []() {
106
+ UR_CHECK_ERROR (
107
+ hipEventCreateWithFlags (&ProfStream, hipStreamNonBlocking));
108
+ ProfStreamCreated = true ;
109
+ });
110
+ }
111
+ native_type getProfilingStream () { return ProfStream; }
112
+
98
113
bool hasBeenSynchronized (uint32_t StreamToken) {
99
114
// stream token not associated with one of the compute streams
100
115
if (StreamToken == std::numeric_limits<uint32_t >::max ()) {
0 commit comments