Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit 8f9071e

Browse files
committed
SWDEV-294669 - Keep one more slot for HW processing
The original logic left only one slot for HW processing in the queue. For some reason there is a race condition on CPU overwrite of the slot before the current active. The workaround is to avoid the previous to the current active slot for possible unfinished HW processing. Change-Id: I565495a8feeaedffc9fc8a505edbee5ff5816975
1 parent f88ef66 commit 8f9071e

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

device/rocm/rocvirtual.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
779779
AqlPacket* packet, uint16_t header, uint16_t rest, bool blocking, size_t size) {
780780
const uint32_t queueSize = gpu_queue_->size;
781781
const uint32_t queueMask = queueSize - 1;
782+
// @note: Reserve extra slot for HW processing. There is unknown race condition in some apps.
783+
const uint32_t sw_queue_size = queueMask - 1;
782784

783785
// Check for queue full and wait if needed.
784786
uint64_t index = hsa_queue_add_write_index_screlease(gpu_queue_, size);
@@ -792,12 +794,12 @@ bool VirtualGPU::dispatchGenericAqlPacket(
792794
}
793795

794796
// Make sure the slot is free for usage
795-
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= queueMask) {
797+
while ((index - hsa_queue_load_read_index_scacquire(gpu_queue_)) >= sw_queue_size) {
796798
amd::Os::yield();
797799
}
798800

799801
// Add blocking command if the original value of read index was behind of the queue size
800-
if (blocking || (index - read) >= queueMask) {
802+
if (blocking || (index - read) >= sw_queue_size) {
801803
if (packet->completion_signal.handle == 0) {
802804
packet->completion_signal = Barriers().ActiveSignal();
803805
}

0 commit comments

Comments
 (0)