Skip to content

Commit a1e9b22

Browse files
author
Konrad Kusiak
committed
Added a profiling stream to the Queue
1 parent dd8b470 commit a1e9b22

File tree

4 files changed

+26
-3
lines changed

4 files changed

+26
-3
lines changed

source/adapters/hip/event.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ ur_result_t ur_event_handle_t_::start() {
5151
try {
5252
if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE || isTimestampEvent()) {
5353
// NOTE: This relies on the default stream to be unused.
54-
UR_CHECK_ERROR(hipEventRecord(EvQueued, 0));
54+
UR_CHECK_ERROR(hipEventRecord(EvQueued, Queue->getProfilingStream()));
5555
UR_CHECK_ERROR(hipEventRecord(EvStart, Stream));
5656
}
5757
} catch (ur_result_t Error) {

source/adapters/hip/event.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ struct ur_event_handle_t_ {
8383
const bool RequiresTimings =
8484
Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE ||
8585
Type == UR_COMMAND_TIMESTAMP_RECORDING_EXP;
86+
if (RequiresTimings) {
87+
Queue->createProfilingStream();
88+
}
8689
native_type EvEnd{nullptr}, EvQueued{nullptr}, EvStart{nullptr};
8790
UR_CHECK_ERROR(hipEventCreateWithFlags(
8891
&EvEnd, RequiresTimings ? hipEventDefault : hipEventDisableTiming));

source/adapters/hip/queue.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
222222
UR_CHECK_ERROR(hipStreamDestroy(S));
223223
});
224224

225+
if (hQueue->ProfStreamCreated) {
226+
UR_CHECK_ERROR(hipStreamSynchronize(ProfStream));
227+
UR_CHECK_ERROR(hipStreamDestroy(ProfStream));
228+
}
229+
225230
return UR_RESULT_SUCCESS;
226231
} catch (ur_result_t Err) {
227232
return Err;

source/adapters/hip/queue.hpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ struct ur_queue_handle_t_ {
2222

2323
std::vector<native_type> ComputeStreams;
2424
std::vector<native_type> TransferStreams;
25+
// Stream used solely when profiling is enabled
26+
native_type ProfStream;
27+
static std::once_flag ProfStreamFlag;
28+
bool ProfStreamCreated;
2529
// DelayCompute keeps track of which streams have been recently reused and
2630
// their next use should be delayed. If a stream has been recently reused it
2731
// will be skipped the next time it would be selected round-robin style. When
@@ -59,8 +63,8 @@ struct ur_queue_handle_t_ {
5963
ur_context_handle_t Context, ur_device_handle_t Device,
6064
unsigned int Flags, ur_queue_flags_t URFlags, int Priority,
6165
bool BackendOwns = true)
62-
: ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move(
63-
TransferStreams)},
66+
: ComputeStreams{std::move(ComputeStreams)},
67+
TransferStreams{std::move(TransferStreams)},
6468
DelayCompute(this->ComputeStreams.size(), false),
6569
ComputeAppliedBarrier(this->ComputeStreams.size()),
6670
TransferAppliedBarrier(this->TransferStreams.size()), Context{Context},
@@ -95,6 +99,17 @@ struct ur_queue_handle_t_ {
9599
native_type getNextTransferStream();
96100
native_type get() { return getNextComputeStream(); };
97101

102+
// Function which creates the profiling stream. Called only from makeNative
103+
// in event handle, if the profiling is enabled.
104+
void createProfilingStream() {
105+
std::call_once(ProfStreamFlag, []() {
106+
UR_CHECK_ERROR(
107+
hipEventCreateWithFlags(&ProfStream, hipStreamNonBlocking));
108+
ProfStreamCreated = true;
109+
});
110+
}
111+
native_type getProfilingStream() { return ProfStream; }
112+
98113
bool hasBeenSynchronized(uint32_t StreamToken) {
99114
// stream token not associated with one of the compute streams
100115
if (StreamToken == std::numeric_limits<uint32_t>::max()) {

0 commit comments

Comments
 (0)