From d180dfb9ec23b7573d8526e81c2c37c2de7a3b95 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 29 May 2025 15:56:03 +0100 Subject: [PATCH 1/2] [Offload] Allow setting null arguments in olLaunchKernel --- offload/liboffload/API/Kernel.td | 6 +++-- .../OffloadAPI/device_code/CMakeLists.txt | 1 + .../unittests/OffloadAPI/device_code/noargs.c | 3 +++ .../OffloadAPI/kernel/olLaunchKernel.cpp | 27 ++++++++++++++++--- 4 files changed, 31 insertions(+), 6 deletions(-) create mode 100644 offload/unittests/OffloadAPI/device_code/noargs.c diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td index 247f9c1bf5b6a..45e3d8112791c 100644 --- a/offload/liboffload/API/Kernel.td +++ b/offload/liboffload/API/Kernel.td @@ -43,19 +43,21 @@ def : Function { let name = "olLaunchKernel"; let desc = "Enqueue a kernel launch with the specified size and parameters."; let details = [ - "If a queue is not specified, kernel execution happens synchronously" + "If a queue is not specified, kernel execution happens synchronously", + "ArgumentsData may be set to NULL (to indicate no parameters)" ]; let params = [ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>, Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>, Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>, - Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>, + Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>, Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>, Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>, Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL> ]; let returns = [ Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>, + Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>, Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>, ]; } diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 5814943e4aaa9..91a9ca24786a3 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -61,6 +61,7 @@ endif() add_offload_test_device_code(foo.c foo) add_offload_test_device_code(bar.c bar) +add_offload_test_device_code(noargs.c noargs) add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS}) diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c new file mode 100644 index 0000000000000..36e609aa26a09 --- /dev/null +++ b/offload/unittests/OffloadAPI/device_code/noargs.c @@ -0,0 +1,3 @@ +#include + +__gpu_kernel void noargs() { (void)0; } diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp index e17d094cfa612..d466799c1acaa 100644 --- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp +++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp @@ -10,14 +10,14 @@ #include #include -struct olLaunchKernelTest : OffloadQueueTest { - void SetUp() override { +struct LaunchKernelTestBase : OffloadQueueTest { + void SetUpKernel(const char *kernel) { RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); - ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin)); + ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin)); ASSERT_GE(DeviceBin->getBufferSize(), 0lu); ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(), DeviceBin->getBufferSize(), &Program)); - ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel)); + ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel)); LaunchArgs.Dimensions = 1; LaunchArgs.GroupSizeX = 64; LaunchArgs.GroupSizeY = 1; @@ -43,8 +43,20 @@ struct olLaunchKernelTest : OffloadQueueTest { ol_kernel_launch_size_args_t LaunchArgs{}; }; +struct olLaunchKernelTest : LaunchKernelTestBase { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("foo")); + } +}; OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelTest); +struct olLaunchKernelNoArgsTest : LaunchKernelTestBase { + void SetUp() override { + RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("noargs")); + } +}; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelNoArgsTest); + TEST_P(olLaunchKernelTest, Success) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, @@ -66,6 +78,13 @@ TEST_P(olLaunchKernelTest, Success) { ASSERT_SUCCESS(olMemFree(Mem)); } +TEST_P(olLaunchKernelNoArgsTest, Success) { + ASSERT_SUCCESS( + olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr)); + + ASSERT_SUCCESS(olWaitQueue(Queue)); +} + TEST_P(olLaunchKernelTest, SuccessSynchronous) { void *Mem; ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, From 70ba0e6f49c2503f22eac0979a2bc83df36f01f7 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Fri, 30 May 2025 10:14:16 +0100 Subject: [PATCH 2/2] Set opts for noargs.c --- offload/unittests/OffloadAPI/device_code/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt index 91a9ca24786a3..c2e4d0cb24e6e 100644 --- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt @@ -7,6 +7,7 @@ macro(add_offload_test_device_code test_filename test_name) add_custom_command(OUTPUT ${BIN_PATH} COMMAND ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda + ${ARGN} -march=${LIBOMPTARGET_DEP_CUDA_ARCH} --cuda-path=${CUDA_ROOT} ${SRC_PATH} -o ${BIN_PATH} @@ -21,6 +22,7 @@ macro(add_offload_test_device_code test_filename test_name) add_custom_command(OUTPUT ${BIN_PATH} COMMAND ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib + ${ARGN} -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH} ${SRC_PATH} -o ${BIN_PATH} DEPENDS ${SRC_PATH} @@ -61,7 +63,9 @@ endif() add_offload_test_device_code(foo.c foo) add_offload_test_device_code(bar.c bar) -add_offload_test_device_code(noargs.c noargs) +# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion +# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload +add_offload_test_device_code(noargs.c noargs -O3) add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})