diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 2b18a4fb6e28f..4ed73e700d8ce 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -572,12 +572,15 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( KernelData KData; KData.setDeviceKernelInfoPtr(DeviceKernelInfo); - KData.setKernelFunc(HostKernel.getPtr()); KData.setNDRDesc(NDRDesc); auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData, bool SchedulerBypass) -> EventImplPtr { if (SchedulerBypass) { + // No need to copy/move the kernel function, so we set + // the function pointer to the original function + KData.setKernelFunc(HostKernel.getPtr()); + return submit_kernel_scheduler_bypass(KData, CGData.MEvents, CallerNeedsEvent, nullptr, nullptr, CodeLoc, IsTopCodeLoc); @@ -589,6 +592,10 @@ EventImplPtr queue_impl::submit_kernel_direct_impl( std::shared_ptr HostKernelPtr = HostKernel.takeOrCopyOwnership(); + // When the kernel function is stored for future use, + // set the function pointer to the stored function + KData.setKernelFunc(HostKernelPtr->getPtr()); + KData.extractArgsAndReqsFromLambda(); CommandGroup.reset(new detail::CGExecKernel( diff --git a/sycl/unittests/kernel-and-program/CMakeLists.txt b/sycl/unittests/kernel-and-program/CMakeLists.txt index 2ac6af2296f83..a96e1e80335c8 100644 --- a/sycl/unittests/kernel-and-program/CMakeLists.txt +++ b/sycl/unittests/kernel-and-program/CMakeLists.txt @@ -8,6 +8,7 @@ add_sycl_unittest(KernelAndProgramTests OBJECT KernelBuildOptions.cpp OutOfResources.cpp InMemCacheEviction.cpp + KernelArgs.cpp ) target_compile_definitions(KernelAndProgramTests_non_preview PRIVATE __SYCL_INTERNAL_API) target_compile_definitions(KernelAndProgramTests_preview PRIVATE __SYCL_INTERNAL_API __INTEL_PREVIEW_BREAKING_CHANGES) diff --git a/sycl/unittests/kernel-and-program/KernelArgs.cpp b/sycl/unittests/kernel-and-program/KernelArgs.cpp new file mode 100644 index 0000000000000..b4cd8bf8af89c --- /dev/null +++ b/sycl/unittests/kernel-and-program/KernelArgs.cpp @@ -0,0 +1,113 @@ +//==------------ KernelArgs.cpp ------ Kernel arguments unit tests ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +#include + +#include + +#include + +using namespace sycl; + +class TestKernelWithIntPtr; + +namespace sycl { +inline namespace _V1 { +namespace detail { + +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr unsigned getNumParams() { return 1; } + static constexpr const char *getName() { return "TestKernelWithIntPtr"; } + static constexpr int64_t getKernelSize() { return sizeof(int); } + + static constexpr const detail::kernel_param_desc_t &getParamDesc(int Index) { + return Index == 0 ? IntParamDesc : Dummy; + } + +private: + static constexpr detail::kernel_param_desc_t IntParamDesc = { + detail::kernel_param_kind_t::kind_std_layout, 0, 0}; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl + +static sycl::unittest::MockDeviceImage Img = + sycl::unittest::generateDefaultImage({"TestKernelWithIntPtr"}); +static sycl::unittest::MockDeviceImageArray<1> ImgArray{&Img}; + +static int ArgInt = 123; + +ur_result_t redefined_urKernelSetArgValue(void *pParams) { + auto params = *static_cast(pParams); + + int ArgValue = *static_cast(*params.ppArgValue); + EXPECT_EQ(ArgValue, ArgInt); + + return UR_RESULT_SUCCESS; +} + +void runKernelWithArgs(queue &Queue, int ArgI) { +// Pack to 1-byte boundaries, so the kernel size is not padded +#pragma pack(push, 1) + auto KernelLambda = [=]([[maybe_unused]] nd_item<1> i) { + [[maybe_unused]] volatile int ArgILocal = ArgI; + }; +#pragma pack(pop) + + Queue.parallel_for(nd_range<1>{32, 32}, KernelLambda); + // Erase the memory to make sure the lambda is not accessible + std::memset(&KernelLambda, 0, sizeof(KernelLambda)); +} + +// This test checks, if the kernel lambda is copied properly, +// so the arguments extraction can happen after the local copy +// of the kernel lambda is deallocated. +TEST(KernelArgsTest, KernelCopy) { + sycl::unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urKernelSetArgValue", + &redefined_urKernelSetArgValue); + + platform Plt = sycl::platform(); + + context Ctx{Plt}; + queue Queue{Ctx, default_selector_v, property::queue::in_order()}; + + std::mutex CvMutex; + std::condition_variable Cv; + bool ready = false; + + // The kernel submission is queued behind a host task, + // to force the scheduler-based submission. + Queue.submit([&](sycl::handler &CGH) { + CGH.host_task([&] { + std::unique_lock lk(CvMutex); + Cv.wait(lk, [&ready] { return ready; }); + }); + }); + + // The kernel lambda is defined in a separate function, + // so it will be deallocated before the argument extraction + // and kernel submission happens. + runKernelWithArgs(Queue, ArgInt); + + { + std::unique_lock lk(CvMutex); + ready = true; + } + Cv.notify_one(); + + Queue.wait(); +}