From 9b9786f29dd1fa022c286962d7a208b6a492e822 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Tue, 22 Jul 2025 15:50:18 +0100 Subject: [PATCH 1/4] [Offload] Add (a new version of) olWaitQueue Not to be confused with olSyncQueue, which used to be called olWaitQueue until #150023. This function causes a queue to wait until all the provided events have completed before running any future scheduled work. --- offload/liboffload/API/Queue.td | 17 ++ offload/liboffload/src/OffloadImpl.cpp | 22 +++ offload/unittests/OffloadAPI/CMakeLists.txt | 3 +- .../OffloadAPI/device_code/CMakeLists.txt | 2 + .../OffloadAPI/device_code/sequence.c | 11 ++ .../OffloadAPI/queue/olWaitQueue.cpp | 148 ++++++++++++++++++ 6 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 offload/unittests/OffloadAPI/device_code/sequence.c create mode 100644 offload/unittests/OffloadAPI/queue/olWaitQueue.cpp diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index 19327cdab4254..43c723de54510 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -41,6 +41,23 @@ def : Function { let returns = []; } +def : Function { + let name = "olWaitQueue"; + let desc = "Make any future work submitted to this queue wait until the provided events are complete."; + let details = [ + "All events in `Events` must complete beforet he queue is unblocked.", + "The input events can be from any queue on any device provided by the same platform as `Queue`.", + ]; + let params = [ + Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>, + Param<"ol_event_handle_t *", "Events", "list of `NumEvents` events to wait for", PARAM_IN>, + Param<"size_t", "NumEvents", "size of `Events`", PARAM_IN>, + ]; + let returns = [ + Return<"OL_ERRC_INVALID_NULL_HANDLE", ["Any event handle in the list is NULL"]>, + ]; +} + def : Enum { let name = "ol_queue_info_t"; let desc = "Supported queue info."; diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index d93e4f1db58a7..c155a6b85387c 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -500,6 +500,28 @@ Error olSyncQueue_impl(ol_queue_handle_t Queue) { return Error::success(); } +Error olWaitQueue_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, + size_t NumEvents) { + auto *Device = Queue->Device->Device; + + for (size_t I = 0; I < NumEvents; I++) { + auto *Event = Events[I]; + + if (!Event) + return Plugin::error(ErrorCode::INVALID_NULL_HANDLE, + "olWaitQueue asked to wait on a NULL event"); + + // Do nothing if the event is for this queue + if (Event->Queue == Queue) + continue; + + if (auto Err = Device->waitEvent(Event->EventInfo, Queue->AsyncInfo)) + return Err; + } + + return Error::success(); +} + Error olGetQueueInfoImplDetail(ol_queue_handle_t Queue, ol_queue_info_t PropName, size_t PropSize, void *PropValue, size_t *PropSizeRet) { diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index f09cfc6bb0876..2621eaeb64e82 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -39,7 +39,8 @@ add_offload_unittest("queue" queue/olSyncQueue.cpp queue/olDestroyQueue.cpp queue/olGetQueueInfo.cpp - queue/olGetQueueInfoSize.cpp) + queue/olGetQueueInfoSize.cpp + queue/olWaitQueue.cpp) add_offload_unittest("symbol" symbol/olGetSymbol.cpp diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 11c8ccbd6c7c5..0e4695ee9969f 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -8,6 +8,7 @@ add_offload_test_device_code(localmem_static.c localmem_static) add_offload_test_device_code(global.c global) add_offload_test_device_code(global_ctor.c global_ctor) add_offload_test_device_code(global_dtor.c global_dtor) +add_offload_test_device_code(sequence.c sequence) add_custom_target(offload_device_binaries DEPENDS foo.bin @@ -19,5 +20,6 @@ add_custom_target(offload_device_binaries DEPENDS global.bin global_ctor.bin global_dtor.bin + sequence.bin ) set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.c new file mode 100644 index 0000000000000..22504086ffa38 --- /dev/null +++ b/offload/unittests/OffloadAPI/device_code/sequence.c @@ -0,0 +1,11 @@ +#include +#include + +__gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { + if (idx == 0) + inout[idx] = 0; + else if (idx == 1) + inout[idx] = 1; + else + inout[idx] = inout[idx-1] + inout[idx-2]; +} diff --git a/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp new file mode 100644 index 0000000000000..fdf272dafa911 --- /dev/null +++ b/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp @@ -0,0 +1,148 @@ +//===------- Offload API tests - olWaitQueue ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +struct olWaitQueueTest : OffloadProgramTest { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUpWith("sequence")); + ASSERT_SUCCESS( + olGetSymbol(Program, "sequence", OL_SYMBOL_KIND_KERNEL, &Kernel)); + LaunchArgs.Dimensions = 1; + LaunchArgs.GroupSize = {1, 1, 1}; + LaunchArgs.NumGroups = {1, 1, 1}; + LaunchArgs.DynSharedMemory = 0; + } + + void TearDown() override { + RETURN_ON_FATAL_FAILURE(OffloadProgramTest::TearDown()); + } + + ol_symbol_handle_t Kernel = nullptr; + ol_kernel_launch_size_args_t LaunchArgs{}; +}; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitQueueTest); + +TEST_P(olWaitQueueTest, Success) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queues[NUM_KERNELS]; + ol_event_handle_t Events[NUM_KERNELS]; + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queues[I], &Events[I - 1], 1)); + + ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, + sizeof(Args), &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, SuccessSingleQueue) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queue; + ol_event_handle_t Events[NUM_KERNELS]; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queue, &Events[I - 1], 1)); + + ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), + &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, SuccessMultipleEvents) { + constexpr size_t NUM_KERNELS = 16; + ol_queue_handle_t Queues[NUM_KERNELS]; + ol_event_handle_t Events[NUM_KERNELS]; + + void *Mem; + ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, + NUM_KERNELS * sizeof(uint32_t), &Mem)); + struct { + uint32_t Idx; + void *Mem; + } Args{0, Mem}; + + for (size_t I = 0; I < NUM_KERNELS; I++) { + Args.Idx = I; + + ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); + + if (I > 0) + ASSERT_SUCCESS(olWaitQueue(Queues[I], Events, I)); + + ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, + sizeof(Args), &LaunchArgs, &Events[I])); + } + + ASSERT_SUCCESS(olSyncEvent(Events[NUM_KERNELS - 1])); + + uint32_t *Data = (uint32_t *)Mem; + for (uint32_t i = 2; i < NUM_KERNELS; i++) { + ASSERT_EQ(Data[i], Data[i - 1] + Data[i - 2]); + } +} + +TEST_P(olWaitQueueTest, InvalidNullQueue) { + ol_event_handle_t Event; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(nullptr, &Event, 1)); +} + +TEST_P(olWaitQueueTest, InvalidNullEvent) { + ol_queue_handle_t Queue; + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olWaitQueue(Queue, nullptr, 1)); +} + +TEST_P(olWaitQueueTest, InvalidNullInnerEvent) { + ol_queue_handle_t Queue; + ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); + ol_event_handle_t Event = nullptr; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(Queue, &Event, 1)); +} From d2a57904dd29a0bf988ae3d0c0344519a402d6ac Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Tue, 22 Jul 2025 15:59:15 +0100 Subject: [PATCH 2/4] Clang-format --- offload/unittests/OffloadAPI/device_code/sequence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.c index 22504086ffa38..7662f2d817496 100644 --- a/offload/unittests/OffloadAPI/device_code/sequence.c +++ b/offload/unittests/OffloadAPI/device_code/sequence.c @@ -7,5 +7,5 @@ __gpu_kernel void sequence(uint32_t idx, uint32_t *inout) { else if (idx == 1) inout[idx] = 1; else - inout[idx] = inout[idx-1] + inout[idx-2]; + inout[idx] = inout[idx - 1] + inout[idx - 2]; } From 9539bfe34c63ec98ca04cdfce371b5046647d3f9 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Tue, 22 Jul 2025 16:01:05 +0100 Subject: [PATCH 3/4] Typo --- offload/liboffload/API/Queue.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index 43c723de54510..1193f40fc96ab 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -45,7 +45,7 @@ def : Function { let name = "olWaitQueue"; let desc = "Make any future work submitted to this queue wait until the provided events are complete."; let details = [ - "All events in `Events` must complete beforet he queue is unblocked.", + "All events in `Events` must complete before the queue is unblocked.", "The input events can be from any queue on any device provided by the same platform as `Queue`.", ]; let params = [ From d52f16f3eecb1b0c2ee2693af0e572485e8c89c4 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 23 Jul 2025 10:59:11 +0100 Subject: [PATCH 4/4] Rename to olWaitEvents --- offload/liboffload/API/Queue.td | 2 +- offload/liboffload/src/OffloadImpl.cpp | 6 ++-- offload/unittests/OffloadAPI/CMakeLists.txt | 2 +- .../{olWaitQueue.cpp => olWaitEvents.cpp} | 30 +++++++++---------- 4 files changed, 20 insertions(+), 20 deletions(-) rename offload/unittests/OffloadAPI/queue/{olWaitQueue.cpp => olWaitEvents.cpp} (80%) diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index 1193f40fc96ab..6e86b2445c73d 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -42,7 +42,7 @@ def : Function { } def : Function { - let name = "olWaitQueue"; + let name = "olWaitEvents"; let desc = "Make any future work submitted to this queue wait until the provided events are complete."; let details = [ "All events in `Events` must complete before the queue is unblocked.", diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index c155a6b85387c..87f924168272d 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -500,8 +500,8 @@ Error olSyncQueue_impl(ol_queue_handle_t Queue) { return Error::success(); } -Error olWaitQueue_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, - size_t NumEvents) { +Error olWaitEvents_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, + size_t NumEvents) { auto *Device = Queue->Device->Device; for (size_t I = 0; I < NumEvents; I++) { @@ -509,7 +509,7 @@ Error olWaitQueue_impl(ol_queue_handle_t Queue, ol_event_handle_t *Events, if (!Event) return Plugin::error(ErrorCode::INVALID_NULL_HANDLE, - "olWaitQueue asked to wait on a NULL event"); + "olWaitEvents asked to wait on a NULL event"); // Do nothing if the event is for this queue if (Event->Queue == Queue) diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index 2621eaeb64e82..6051532d404d3 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -40,7 +40,7 @@ add_offload_unittest("queue" queue/olDestroyQueue.cpp queue/olGetQueueInfo.cpp queue/olGetQueueInfoSize.cpp - queue/olWaitQueue.cpp) + queue/olWaitEvents.cpp) add_offload_unittest("symbol" symbol/olGetSymbol.cpp diff --git a/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp b/offload/unittests/OffloadAPI/queue/olWaitEvents.cpp similarity index 80% rename from offload/unittests/OffloadAPI/queue/olWaitQueue.cpp rename to offload/unittests/OffloadAPI/queue/olWaitEvents.cpp index fdf272dafa911..c1e9acd1797d8 100644 --- a/offload/unittests/OffloadAPI/queue/olWaitQueue.cpp +++ b/offload/unittests/OffloadAPI/queue/olWaitEvents.cpp @@ -1,4 +1,4 @@ -//===------- Offload API tests - olWaitQueue ------------------------------===// +//===------- Offload API tests - olWaitEvents -----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,7 +10,7 @@ #include #include -struct olWaitQueueTest : OffloadProgramTest { +struct olWaitEventsTest : OffloadProgramTest { void SetUp() override { RETURN_ON_FATAL_FAILURE(OffloadProgramTest::SetUpWith("sequence")); ASSERT_SUCCESS( @@ -28,9 +28,9 @@ struct olWaitQueueTest : OffloadProgramTest { ol_symbol_handle_t Kernel = nullptr; ol_kernel_launch_size_args_t LaunchArgs{}; }; -OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitQueueTest); +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olWaitEventsTest); -TEST_P(olWaitQueueTest, Success) { +TEST_P(olWaitEventsTest, Success) { constexpr size_t NUM_KERNELS = 16; ol_queue_handle_t Queues[NUM_KERNELS]; ol_event_handle_t Events[NUM_KERNELS]; @@ -49,7 +49,7 @@ TEST_P(olWaitQueueTest, Success) { ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); if (I > 0) - ASSERT_SUCCESS(olWaitQueue(Queues[I], &Events[I - 1], 1)); + ASSERT_SUCCESS(olWaitEvents(Queues[I], &Events[I - 1], 1)); ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, sizeof(Args), &LaunchArgs, &Events[I])); @@ -63,7 +63,7 @@ TEST_P(olWaitQueueTest, Success) { } } -TEST_P(olWaitQueueTest, SuccessSingleQueue) { +TEST_P(olWaitEventsTest, SuccessSingleQueue) { constexpr size_t NUM_KERNELS = 16; ol_queue_handle_t Queue; ol_event_handle_t Events[NUM_KERNELS]; @@ -82,7 +82,7 @@ TEST_P(olWaitQueueTest, SuccessSingleQueue) { Args.Idx = I; if (I > 0) - ASSERT_SUCCESS(olWaitQueue(Queue, &Events[I - 1], 1)); + ASSERT_SUCCESS(olWaitEvents(Queue, &Events[I - 1], 1)); ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs, &Events[I])); @@ -96,7 +96,7 @@ TEST_P(olWaitQueueTest, SuccessSingleQueue) { } } -TEST_P(olWaitQueueTest, SuccessMultipleEvents) { +TEST_P(olWaitEventsTest, SuccessMultipleEvents) { constexpr size_t NUM_KERNELS = 16; ol_queue_handle_t Queues[NUM_KERNELS]; ol_event_handle_t Events[NUM_KERNELS]; @@ -115,7 +115,7 @@ TEST_P(olWaitQueueTest, SuccessMultipleEvents) { ASSERT_SUCCESS(olCreateQueue(Device, &Queues[I])); if (I > 0) - ASSERT_SUCCESS(olWaitQueue(Queues[I], Events, I)); + ASSERT_SUCCESS(olWaitEvents(Queues[I], Events, I)); ASSERT_SUCCESS(olLaunchKernel(Queues[I], Device, Kernel, &Args, sizeof(Args), &LaunchArgs, &Events[I])); @@ -129,20 +129,20 @@ TEST_P(olWaitQueueTest, SuccessMultipleEvents) { } } -TEST_P(olWaitQueueTest, InvalidNullQueue) { +TEST_P(olWaitEventsTest, InvalidNullQueue) { ol_event_handle_t Event; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(nullptr, &Event, 1)); + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitEvents(nullptr, &Event, 1)); } -TEST_P(olWaitQueueTest, InvalidNullEvent) { +TEST_P(olWaitEventsTest, InvalidNullEvent) { ol_queue_handle_t Queue; ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); - ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olWaitQueue(Queue, nullptr, 1)); + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olWaitEvents(Queue, nullptr, 1)); } -TEST_P(olWaitQueueTest, InvalidNullInnerEvent) { +TEST_P(olWaitEventsTest, InvalidNullInnerEvent) { ol_queue_handle_t Queue; ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); ol_event_handle_t Event = nullptr; - ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitQueue(Queue, &Event, 1)); + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olWaitEvents(Queue, &Event, 1)); }