diff --git a/sycl/test-e2e/KernelAndProgram/persistent-cache-multi-device.cpp b/sycl/test-e2e/KernelAndProgram/persistent-cache-multi-device.cpp deleted file mode 100644 index 9d7f28849458..000000000000 --- a/sycl/test-e2e/KernelAndProgram/persistent-cache-multi-device.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// REQUIRES: (level_zero || opencl) && linux && gpu - -// RUN: %{build} -o %t.out -// RUN: rm -rf %t/cache_dir -// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=3 SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=1 SYCL_CACHE_DIR=%t/cache_dir env -u XDG_CACHE_HOME env -u HOME %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-BUILD -// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=3 SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=1 SYCL_CACHE_DIR=%t/cache_dir env -u XDG_CACHE_HOME env -u HOME %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-CACHE - -// Depends on SPIR-V Backend & run-time drivers version. -// XFAIL: spirv-backend && run-mode -// XFAIL-TRACKER: CMPLRLLVM-64705 - -// XFAIL: linux && arch-intel_gpu_bmg_g21 && !igc-dev && run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17453 - -// Test checks that persistent cache works correctly with multiple devices. - -#include -#include - -using namespace sycl; - -class SimpleKernel; - -int main(void) { - platform plt; - auto devs = plt.get_devices(); - context ctx(devs); - assert(devs.size() >= 3); - - constexpr size_t sz = 1024; - sycl::buffer bufA(sz); - auto bundle = sycl::get_kernel_bundle(ctx); - // CHECK-BUILD: [Persistent Cache]: device binary has been cached - // CHECK-CACHE: [Persistent Cache]: using cached device binary - auto bundle_exe = sycl::build(bundle, {devs[0], devs[2]}); - auto kernel = bundle_exe.get_kernel(sycl::get_kernel_id()); - sycl::queue q(devs[2]); - q.submit([&](sycl::handler &cgh) { - sycl::accessor accA(bufA, cgh, sycl::write_only); - cgh.parallel_for(sycl::range<1>(sz), [=](sycl::item<1> item) { - accA[item] = item.get_linear_id(); - }); - }); - q.wait(); - return 0; -} diff --git a/sycl/test-e2e/ProgramManager/multi_device_bundle/build_twice.cpp b/sycl/test-e2e/ProgramManager/multi_device_bundle/build_twice.cpp deleted file mode 100644 index 221d136dacf3..000000000000 --- a/sycl/test-e2e/ProgramManager/multi_device_bundle/build_twice.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// REQUIRES: gpu && linux && (opencl || level_zero) - -// Test to check that we can create input kernel bundle and call build twice for -// overlapping set of devices and execute the kernel on each device. - -// RUN: %{build} -o %t.out -// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=3 SYCL_UR_TRACE=2 %{run} %t.out | FileCheck %s - -// Depends on SPIR-V Backend & run-time drivers version. -// XFAIL: spirv-backend && run-mode -// XFAIL-TRACKER: CMPLRLLVM-64705 - -// XFAIL: linux && arch-intel_gpu_bmg_g21 && !igc-dev && run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17453 - -#include -#include - -class Kernel; - -int main() { - sycl::platform platform; - auto devices = platform.get_devices(); - if (!(devices.size() >= 3)) - return 0; - - auto dev1 = devices[0], dev2 = devices[1], dev3 = devices[2]; - - auto ctx = sycl::context({dev1, dev2, dev3}); - sycl::queue queues[3] = {sycl::queue(ctx, dev1), sycl::queue(ctx, dev2), - sycl::queue(ctx, dev3)}; - sycl::kernel_id kid = sycl::get_kernel_id(); - sycl::kernel_bundle kernelBundleInput = - sycl::get_kernel_bundle(ctx, {kid}); - // CHECK: urProgramCreateWithIL( - // CHECK: urProgramBuildExp( - auto KernelBundleExe1 = build(kernelBundleInput, {dev1, dev2}); - // CHECK: urProgramCreateWithIL( - // CHECK: urProgramBuildExp( - auto KernelBundleExe2 = build(kernelBundleInput, {dev2, dev3}); - // No other program creation calls are expected. - // CHECK-NOT: urProgramCreateWithIL( - auto KernelObj1 = KernelBundleExe1.get_kernel(kid); - auto KernelObj2 = KernelBundleExe2.get_kernel(kid); - queues[0].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(KernelBundleExe1); - cgh.single_task([=]() {}); - }); - queues[1].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(KernelBundleExe1); - cgh.single_task(KernelObj1); - }); - queues[1].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(KernelBundleExe2); - cgh.single_task(KernelObj2); - }); - queues[2].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(KernelBundleExe2); - cgh.single_task(KernelObj2); - }); - return 0; -} diff --git a/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp b/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp deleted file mode 100644 index 2012b8a55a77..000000000000 --- a/sycl/test-e2e/ProgramManager/multi_device_bundle/device_libs_and_caching.cpp +++ /dev/null @@ -1,164 +0,0 @@ -// REQUIRES: ocloc && gpu && linux && target-spir - -// Test to check several use cases for multi-device kernel bundles. -// Test covers AOT and JIT cases. Kernel is using some math functions to enforce -// using device libraries to excersise additional logic in the program manager. -// Checks are used to test that program and device libraries caching works as -// expected. - -// Test JIT first. -// Intentionally use jit linking of device libraries to check that program -// manager can handle this as well. With this option program manager will -// compile the main program, load and compile device libraries and then link -// everything together. -// RUN: %{build} -fsycl-device-lib-jit-link -o %t_jit.out - -// Check the default case when in-memory caching is enabled. -// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=4 SYCL_UR_TRACE=2 %{run} %t_jit.out | FileCheck %s --check-prefixes=CHECK-SPIRV-JIT-LINK-TRACE - -// Check the case when in-memory caching of the programs is disabled. -// RUN: env SYCL_CACHE_IN_MEM=0 NEOReadDebugKeys=1 CreateMultipleRootDevices=4 %{run} %t_jit.out - -// Test AOT next. -// RUN: %{build} -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen "-device *" -o %t_aot.out - -// Check the default case when in-memory caching is enabled. -// RUN: env NEOReadDebugKeys=1 CreateMultipleRootDevices=4 SYCL_UR_TRACE=2 %{run} %t_aot.out | FileCheck %s --check-prefixes=CHECK-AOT-TRACE - -// Check the case when in-memory caching of the programs is disabled. -// RUN: env SYCL_CACHE_IN_MEM=0 NEOReadDebugKeys=1 CreateMultipleRootDevices=4 %{run} %t_aot.out - -// Depends on SPIR-V Backend & run-time drivers version. -// XFAIL: spirv-backend && run-mode -// XFAIL-TRACKER: CMPLRLLVM-64705 - -// XFAIL: linux && arch-intel_gpu_bmg_g21 && !igc-dev && run-mode -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17453 - -#include -#include -#include -#include -#include -#include - -class Kernel; -class Kernel2; -class Kernel3; - -int main() { - sycl::platform platform; - auto devices = platform.get_devices(); - if (!(devices.size() >= 4)) - return 0; - auto dev1 = devices[0], dev2 = devices[1], dev3 = devices[2], - dev4 = devices[3]; - auto ctx = sycl::context({dev1, dev2, dev3, dev4}); - sycl::queue queues[4] = {sycl::queue(ctx, dev1), sycl::queue(ctx, dev2), - sycl::queue(ctx, dev3), sycl::queue(ctx, dev4)}; - - auto res = sycl::malloc_host(3, ctx); - auto KernelLambda = [=]() { - res[0] = sycl::ext::intel::math::float2int_rd(4.0f) + (int)sqrtf(4.0f) + - std::exp(std::complex(0.f, 0.f)).real(); - }; - // Test case 1 - // Get bundle in executable state for multiple devices in a context, enqueue a - // kernel to each device. - { - sycl::kernel_id kid = sycl::get_kernel_id(); - // Create the main program containing the kernel. - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCreateWithIL( - - // Create and compile the program for required device libraries (2 of them - // in this case). - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCreateWithIL( - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCompileExp( - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCreateWithIL( - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCompileExp( - - // Compile the main program - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCompileExp( - - // Link main program and device libraries. - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramLinkExp( - - // CHECK-AOT-TRACE: urProgramCreateWithBinary( - // CHECK-AOT-TRACE: urProgramBuildExp( - sycl::kernel_bundle kernelBundleExecutable = - sycl::get_kernel_bundle( - ctx, {dev1, dev2, dev3}, {kid}); - - for (int i = 0; i < 3; i++) { - queues[i].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(kernelBundleExecutable); - cgh.single_task(KernelLambda); - }); - queues[i].wait(); - } - std::cout << "Test #1 passed." << std::endl; - } - - // Test case 2 - // Get two bundles in executable state: for the first two devices in the - // context and for the new set of devices which includes the dev4. This checks - // caching of the programs and device libraries. - { - sycl::kernel_id kid = sycl::get_kernel_id(); - // Program associated with {dev1, dev2, dev3} is supposed to be cached from - // the first test case, we don't expect any additional program creation and - // compilation calls for the following bundles because they are all created - // for subsets of {dev1, dev2, dev3} which means that the program handle - // from cache will be used. - sycl::kernel_bundle kernelBundleExecutableSubset1 = - sycl::get_kernel_bundle( - ctx, {dev1, dev2}, {kid}); - sycl::kernel_bundle kernelBundleExecutableSubset2 = - sycl::get_kernel_bundle( - ctx, {dev2, dev3}, {kid}); - sycl::kernel_bundle kernelBundleExecutableSubset3 = - sycl::get_kernel_bundle( - ctx, {dev1, dev3}, {kid}); - sycl::kernel_bundle kernelBundleExecutableSubset4 = - sycl::get_kernel_bundle(ctx, {dev3}, - {kid}); - - // Here we create a bundle with a different set of devices which includes - // dev4, so we expect new UR program creation. - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCreateWithIL( - - // Device libraries will be additionally compiled for dev4, but no program - // creation is expected for device libraries as program handle already - // exists in the per-context cache. - // CHECK-SPIRV-JIT-LINK-TRACE-NOT: urProgramCreateWithIL( - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCompileExp( - - // Main program will be compiled for new set of devices. - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramCompileExp( - - // Main program will be linked with device libraries. - // CHECK-SPIRV-JIT-LINK-TRACE: urProgramLinkExp( - - // CHECK-AOT-TRACE: urProgramCreateWithBinary( - // CHECK-AOT-TRACE: urProgramBuildExp( - sycl::kernel_bundle kernelBundleExecutableNewSet = - sycl::get_kernel_bundle( - ctx, {dev2, dev3, dev4}, {kid}); - - for (int i = 0; i < 3; i++) { - queues[0].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(kernelBundleExecutableSubset1); - cgh.single_task(KernelLambda); - }); - queues[0].wait(); - - queues[2].submit([=](sycl::handler &cgh) { - cgh.use_kernel_bundle(kernelBundleExecutableNewSet); - cgh.single_task(KernelLambda); - }); - queues[2].wait(); - } - std::cout << "Test #2 passed." << std::endl; - } - return 0; -} diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index 9c8fdef64289..9ecf730cc6c1 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -161,7 +161,7 @@ template LifetimeExtender(std::vector) -> LifetimeExtender; /// Convenience wrapper for sycl_device_binary_property_set. class MockPropertySet { public: - MockPropertySet() { + MockPropertySet(const std::vector &DeviceLibExts = {}) { // Most of unit-tests are statically linked with SYCL RT. On Linux and Mac // systems that causes incorrect RT installation directory detection, which // prevents proper loading of fallback libraries. See intel/llvm#6945 @@ -170,11 +170,23 @@ class MockPropertySet { // unless there is a special property attached to it or special env variable // is set which forces RT to skip fallback libraries. // - // Setting this property here so unit-tests can be launched under any - // environment. + // By default, property is set to empty mask here so that unit-tests can be + // launched under any environment. Some unit tests might create dummy + // fallback libaries and require fallback libraries to be loaded, in such + // case input vector will be non-empty. - std::vector Data(/* eight elements */ 8, + std::vector Data(/* four elements */ 4, /* each element is zero */ 0); + if (!DeviceLibExts.empty()) { + uint32_t DeviceLibReqMask = 0; + for (auto Ext : DeviceLibExts) { + DeviceLibReqMask |= 0x1 + << (static_cast(Ext) - + static_cast( + DeviceLibExt::cl_intel_devicelib_assert)); + } + std::memcpy(Data.data(), &DeviceLibReqMask, sizeof(DeviceLibReqMask)); + } // Name doesn't matter here, it is not used by RT // Value must be an all-zero 32-bit mask, which would mean that no fallback // libraries are needed to be loaded. diff --git a/sycl/unittests/program_manager/CMakeLists.txt b/sycl/unittests/program_manager/CMakeLists.txt index 2b9577b67d9a..4c65804b9493 100644 --- a/sycl/unittests/program_manager/CMakeLists.txt +++ b/sycl/unittests/program_manager/CMakeLists.txt @@ -6,6 +6,7 @@ add_sycl_unittest(ProgramManagerTests OBJECT SubDevices.cpp passing_link_and_compile_options.cpp Cleanup.cpp + MultipleDevsKernelBundle.cpp ) add_subdirectory(arg_mask) diff --git a/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp new file mode 100644 index 000000000000..df4b7da331c8 --- /dev/null +++ b/sycl/unittests/program_manager/MultipleDevsKernelBundle.cpp @@ -0,0 +1,599 @@ +//==----------------------- MultipleDevsKernelBundle.cpp -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Kernel bundle for multiple devices unit test + +#include "detail/context_impl.hpp" +#include "detail/kernel_bundle_impl.hpp" +#include "detail/persistent_device_code_cache.hpp" +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +using namespace sycl; + +class MultipleDevsKernelBundleTestKernel; +class DevLibTestKernel; + +MOCK_INTEGRATION_HEADER(MultipleDevsKernelBundleTestKernel) +MOCK_INTEGRATION_HEADER(DevLibTestKernel) + +using namespace sycl::unittest; + +inline void createDummyDeviceLib(sycl::detail::DeviceLibExt Ext) { + // Create a dummy fallback library correpsonding to the extension (if it + // doesn't exist). + std::string ExtName; + switch (Ext) { + case sycl::detail::DeviceLibExt::cl_intel_devicelib_math: + ExtName = "libsycl-fallback-cmath"; + break; + case sycl::detail::DeviceLibExt::cl_intel_devicelib_assert: + ExtName = "libsycl-fallback-cassert"; + break; + default: + FAIL() << "Unknown device library extension"; + } + + auto DSOPath = sycl::detail::OSUtil::getCurrentDSODir(); + std::string LibPath = DSOPath + detail::OSUtil::DirSep + ExtName + ".spv"; + std::ifstream LibFile(LibPath); + if (LibFile.good()) { + LibFile.close(); + } else { + std::ofstream LibFile(LibPath); + LibFile << "0"; + LibFile.close(); + } +} + +// Function to geneate mock device image which uses device libraries. +inline sycl::unittest::MockDeviceImage generateImage( + std::initializer_list KernelNames, + sycl::detail::ur::DeviceBinaryType BinType, const char *DeviceTargetSpec, + const std::vector &DeviceLibExts = {}) { + // Create dummy device libraries if they don't exist. + for (auto Ext : DeviceLibExts) { + createDummyDeviceLib(Ext); + } + + MockPropertySet PropSet(DeviceLibExts); + + std::string Combined; + for (auto it = KernelNames.begin(); it != KernelNames.end(); ++it) { + if (it != KernelNames.begin()) + Combined += ", "; + Combined += *it; + } + std::vector Bin(Combined.begin(), Combined.end()); + Bin.push_back(0); + + std::vector Entries = makeEmptyKernels(KernelNames); + + sycl::unittest::MockDeviceImage Img{BinType, // Format + DeviceTargetSpec, // DeviceTargetSpec + "", // Compile options + "", // Link options + std::move(Bin), + std::move(Entries), + std::move(PropSet)}; + return Img; +} + +// Set of mock device images which will be used in the tests. +static sycl::unittest::MockDeviceImage Imgs[3] = { + sycl::unittest::generateDefaultImage( + {"MultipleDevsKernelBundleTestKernel"}), + generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_SPIRV, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64, + {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, + sycl::detail::DeviceLibExt::cl_intel_devicelib_assert}), + generateImage({"DevLibTestKernel"}, SYCL_DEVICE_BINARY_TYPE_NATIVE, + __SYCL_DEVICE_BINARY_TARGET_SPIRV64_X86_64, + {sycl::detail::DeviceLibExt::cl_intel_devicelib_math, + sycl::detail::DeviceLibExt::cl_intel_devicelib_assert})}; + +static sycl::unittest::MockDeviceImageArray<3> ImgArray{Imgs}; + +struct MockDeviceData { + int Index; + ur_device_type_t DeviceType; + ur_device_handle_t getHandle() { + return reinterpret_cast(this); + } + static MockDeviceData *fromHandle(ur_device_handle_t handle) { + return reinterpret_cast(handle); + } +}; + +// List of devices. +MockDeviceData MockGPUDevices[] = {{0, UR_DEVICE_TYPE_GPU}, + {1, UR_DEVICE_TYPE_GPU}, + {2, UR_DEVICE_TYPE_GPU}, + {3, UR_DEVICE_TYPE_GPU}}; +MockDeviceData MockCPUDevices[] = {{0, UR_DEVICE_TYPE_CPU}, + {1, UR_DEVICE_TYPE_CPU}, + {2, UR_DEVICE_TYPE_CPU}, + {3, UR_DEVICE_TYPE_CPU}}; + +static ur_result_t redefinedDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + size_t Size = (*params.pDeviceType == UR_DEVICE_TYPE_GPU) + ? std::size(MockGPUDevices) + : std::size(MockCPUDevices); + MockDeviceData *MockDevices = (*params.pDeviceType == UR_DEVICE_TYPE_GPU) + ? MockGPUDevices + : MockCPUDevices; + + if (*params.ppNumDevices) { + **params.ppNumDevices = static_cast(Size); + return UR_RESULT_SUCCESS; + } + + if (*params.pphDevices) { + assert(*params.pNumEntries <= Size); + for (uint32_t i = 0; i < *params.pNumEntries; ++i) { + (*params.pphDevices)[i] = MockDevices[i].getHandle(); + } + } + + return UR_RESULT_SUCCESS; +} + +// Choose SPIRV image for gpu device and Native image for cpu device. +static ur_result_t redefinedDeviceSelectBinary(void *pParams) { + auto params = *static_cast(pParams); + auto target = MockDeviceData::fromHandle(*params.phDevice)->DeviceType == + UR_DEVICE_TYPE_CPU + ? UR_DEVICE_BINARY_TARGET_SPIRV64_X86_64 + : UR_DEVICE_BINARY_TARGET_SPIRV64; + // If compatible binary is found, select it, otherwise return -1 as an index - + // this is what program manager expects. + **params.ppSelectedBinary = int32_t(-1); + for (uint32_t i = 0; i < *params.pNumBinaries; ++i) { + if (strcmp((*params.ppBinaries)[i].pDeviceTargetSpec, target) == 0) { + **params.ppSelectedBinary = i; + return UR_RESULT_SUCCESS; + } + } + return UR_RESULT_SUCCESS; +} + +inline ur_result_t redefinedurKernelGetInfo(void *pParams) { + auto params = *static_cast(pParams); + constexpr char MockKernel[] = "MultipleDevsKernelBundleTestKernel"; + if (*params.ppropName == UR_KERNEL_INFO_FUNCTION_NAME) { + if (*params.ppPropValue) { + assert(*params.ppropSize == sizeof(MockKernel)); + std::memcpy(*params.ppPropValue, MockKernel, sizeof(MockKernel)); + } + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(MockKernel); + } + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_DEVICE_INFO_MULTI_DEVICE_COMPILE_SUPPORT_EXP) { + auto *Result = reinterpret_cast(*params.ppPropValue); + *Result = true; + } + return UR_RESULT_SUCCESS; +} + +static int ProgramBuildExpCounter = 0; +static ur_result_t redefinedurProgramBuildExp(void *) { + ++ProgramBuildExpCounter; + return UR_RESULT_SUCCESS; +} + +static int ProgramCreateWithILCounter = 0; +static ur_result_t redefinedurProgramCreateWithIL(void *) { + ++ProgramCreateWithILCounter; + return UR_RESULT_SUCCESS; +} + +static int ProgramLinkExpCounter = 0; +static ur_result_t redefinedurProgramLinkExp(void *) { + ++ProgramLinkExpCounter; + return UR_RESULT_SUCCESS; +} + +static int ProgramCompileExpCounter = 0; +static ur_result_t redefinedurProgramCompileExp(void *) { + ++ProgramCompileExpCounter; + return UR_RESULT_SUCCESS; +} + +static int ProgramCreateWithBinaryCounter = 0; +static ur_result_t redefinedurProgramCreateWithBinary(void *) { + ++ProgramCreateWithBinaryCounter; + return UR_RESULT_SUCCESS; +} + +class MultipleDevsKernelBundleTest + : public testing::TestWithParam { +public: + MultipleDevsKernelBundleTest() : Mock{}, Plt{sycl::platform()} {} + +protected: + void SetUp() override { + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + mock::getCallbacks().set_after_callback("urKernelGetInfo", + &redefinedurKernelGetInfo); + mock::getCallbacks().set_after_callback("urProgramBuildExp", + &redefinedurProgramBuildExp); + mock::getCallbacks().set_after_callback("urProgramCreateWithIL", + &redefinedurProgramCreateWithIL); + mock::getCallbacks().set_after_callback("urProgramLinkExp", + &redefinedurProgramLinkExp); + mock::getCallbacks().set_after_callback("urProgramCompileExp", + &redefinedurProgramCompileExp); + mock::getCallbacks().set_after_callback("urDeviceSelectBinary", + &redefinedDeviceSelectBinary); + mock::getCallbacks().set_after_callback( + "urProgramCreateWithBinary", &redefinedurProgramCreateWithBinary); + } + +protected: + unittest::UrMock<> Mock; + platform Plt; +}; + +// Test to check that we can create input kernel bundle and call build twice for +// overlapping set of devices and execute the kernel on each device. +TEST_P(MultipleDevsKernelBundleTest, BuildTwiceWithOverlappingDevices) { + // Reset counters + ProgramCreateWithILCounter = 0; + ProgramBuildExpCounter = 0; + + // Get devices and create a context with at least 3 devices + std::vector Devices = + Plt.get_devices(sycl::info::device_type::gpu); + ASSERT_GE(Devices.size(), 3lu) << "Test requires at least 3 devices"; + + auto Dev1 = Devices[0], Dev2 = Devices[1], Dev3 = Devices[2]; + + // Create a context with the selected devices + sycl::context Context({Dev1, Dev2, Dev3}); + + // Create queues for each device + sycl::queue Queue1(Context, Dev1); + sycl::queue Queue2(Context, Dev2); + sycl::queue Queue3(Context, Dev3); + + // Get kernel ID + auto KernelID = sycl::get_kernel_id(); + + // Create an input kernel bundle + auto KernelBundleInput = + sycl::get_kernel_bundle(Context, {KernelID}); + + // Build kernel bundles for overlapping sets of devices + auto KernelBundleExe1 = sycl::build(KernelBundleInput, {Dev1, Dev2}); + auto KernelBundleExe2 = sycl::build(KernelBundleInput, {Dev2, Dev3}); + + // Get kernel objects from the built bundles + auto KernelObj1 = KernelBundleExe1.get_kernel(KernelID); + auto KernelObj2 = KernelBundleExe2.get_kernel(KernelID); + + // Submit tasks to the queues using the kernel bundles + Queue1.submit([&](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExe1); + cgh.single_task([]() {}); + }); + + Queue2.submit([&](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExe1); + cgh.single_task(KernelObj1); + }); + + Queue2.submit([&](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExe2); + cgh.single_task(KernelObj2); + }); + + Queue3.submit([&](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExe2); + cgh.single_task(KernelObj2); + }); + + // Verify the number of urProgramCreateWithIL calls + EXPECT_EQ(ProgramCreateWithILCounter, 2) + << "Expect 2 urProgramCreateWithIL calls"; + + // Verify the number of urProgramBuildExp calls + EXPECT_EQ(ProgramBuildExpCounter, 2) << "Expect 2 urProgramBuildExp calls"; +} + +// Test to check several use cases for multi-device kernel bundles. +// Test covers AOT and JIT cases. We mock usage of fallback device libaries to +// excersise additional logic in the program manager. Checks are used to test +// that program and device libraries caching works as expected. +TEST_P(MultipleDevsKernelBundleTest, DeviceLibs) { + // Unset the SYCL_DEVICELIB_NO_FALLBACK so that fallback libraries are used. + ScopedEnvVar var("SYCL_DEVICELIB_NO_FALLBACK", nullptr, + SYCLConfig::reset); + std::vector Devices = + Plt.get_devices(GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE + ? sycl::info::device_type::cpu + : sycl::info::device_type::gpu); + ASSERT_GE(Devices.size(), 4lu) << "Test requires at least 4 devices"; + + auto Dev1 = Devices[0], Dev2 = Devices[1], Dev3 = Devices[2], + Dev4 = Devices[3]; + + // Create a context with the selected devices + sycl::context Context({Dev1, Dev2, Dev3, Dev4}); + sycl::queue Queues[4] = { + sycl::queue(Context, Dev1), sycl::queue(Context, Dev2), + sycl::queue(Context, Dev3), sycl::queue(Context, Dev4)}; + { + // Test case 1 + // Get bundle in executable state for multiple devices in a context, enqueue + // a kernel to each device. + + // Reset counters + ProgramCreateWithILCounter = 0; + ProgramBuildExpCounter = 0; + ProgramLinkExpCounter = 0; + ProgramCompileExpCounter = 0; + ProgramCreateWithBinaryCounter = 0; + + // Get bundle in executable state for multiple devices in a context, enqueue + // a kernel to each device. + sycl::kernel_id KernelID = sycl::get_kernel_id(); + sycl::kernel_bundle KernelBundleExecutable = + sycl::get_kernel_bundle( + Context, {Dev1, Dev2, Dev3}, {KernelID}); + for (int i = 0; i < 2; i++) { + Queues[i].submit([=](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExecutable); + cgh.single_task([=]() {}); + }); + Queues[i].wait(); + } + + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { + // Verify the number of urProgramCreateWithIL calls: we expect 2 calls for + // fallback libraries (assert + math) and 1 call for the main program. + EXPECT_EQ(ProgramCreateWithILCounter, 3) + << "Expect 3 urProgramCreateWithIL calls"; + + // Verify the number of urProgramBuildExp calls: none expected as we + // compile and link in this case. + EXPECT_EQ(ProgramBuildExpCounter, 0) + << "Expect 0 urProgramBuildExp calls"; + + // Verify the number of urProgramCompileExp calls: we expect 2 calls to + // compile fallback libraries and 1 call to compile the main program. + EXPECT_EQ(ProgramCompileExpCounter, 3) + << "Expect 3 urProgramCompileExp calls"; + + // Verify the number of urProgramLinkExp calls: we expect 1 call which + // links the main program and fallback libraries. + EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + } + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { + // In case of AOT compilation, we expect 1 call to + // urProgramCreateWithBinary. + EXPECT_EQ(ProgramCreateWithBinaryCounter, 1) + << "Expect 3 urProgramCreateWithIL calls"; + + // And a single call to urProgramBuildExp. In this case libraries are + // linked beforehand, so we don't compile/link them online. + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 0 urProgramBuildExp calls"; + } + } + + { + + // Test case 2 + // Get bundles in executable state: for pairs of devices excluding dev4 and + // for the new set of devices which includes the dev4. This checks caching + // of the programs and device libraries. + + // Reset counters + ProgramCreateWithILCounter = 0; + ProgramBuildExpCounter = 0; + ProgramLinkExpCounter = 0; + ProgramCompileExpCounter = 0; + ProgramCreateWithBinaryCounter = 0; + sycl::kernel_id KernelID = sycl::get_kernel_id(); + // Program associated with {dev1, dev2, dev3} is supposed to be cached from + // the first test case, we don't expect any additional program creation and + // compilation calls for the following bundles because they are all created + // for subsets of {dev1, dev2, dev3} which means that the program handle + // from cache will be used. + sycl::kernel_bundle KernelBundleExecutableSubset1 = + sycl::get_kernel_bundle( + Context, {Dev1, Dev2}, {KernelID}); + sycl::kernel_bundle KernelBundleExecutableSubset2 = + sycl::get_kernel_bundle( + Context, {Dev2, Dev3}, {KernelID}); + sycl::kernel_bundle KernelBundleExecutableSubset3 = + sycl::get_kernel_bundle( + Context, {Dev1, Dev3}, {KernelID}); + sycl::kernel_bundle KernelBundleExecutableSubset4 = + sycl::get_kernel_bundle(Context, {Dev3}, + {KernelID}); + EXPECT_EQ(ProgramCreateWithILCounter, 0); + EXPECT_EQ(ProgramCompileExpCounter, 0); + EXPECT_EQ(ProgramLinkExpCounter, 0); + + // Next we create a bundle with a different set of devices which includes + // dev4, so we expect new UR program creation. Also main program will be + // compiled for new set of devices. Each of device libraries (assert and + // math) will be additionally compiled for dev4, but no program creation is + // expected for device libraries as program handle already exists in the + // per-context cache. + sycl::kernel_bundle KernelBundleExecutableNewSet = + sycl::get_kernel_bundle( + Context, {Dev2, Dev3, Dev4}, {KernelID}); + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_SPIRV) { + EXPECT_EQ(ProgramCreateWithILCounter, 1) + << "Expect 1 urProgramCreateWithIL calls"; + EXPECT_EQ(ProgramCompileExpCounter, 3) + << "Expect 3 urProgramCompileExp calls"; + EXPECT_EQ(ProgramLinkExpCounter, 1) << "Expect 1 urProgramLinkExp calls"; + } + + if (GetParam() == SYCL_DEVICE_BINARY_TYPE_NATIVE) { + EXPECT_EQ(ProgramCreateWithBinaryCounter, 1) + << "Expect 1 urProgramCreateWithBinary calls"; + EXPECT_EQ(ProgramBuildExpCounter, 1) + << "Expect 1 urProgramBuildExp calls"; + } + + for (int i = 0; i < 3; i++) { + Queues[0].submit([=](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExecutableSubset1); + cgh.single_task([=]() {}); + }); + Queues[0].wait(); + + Queues[2].submit([=](sycl::handler &cgh) { + cgh.use_kernel_bundle(KernelBundleExecutableNewSet); + cgh.single_task([=]() {}); + }); + Queues[2].wait(); + } + } + + // Reset the SYCL_DEVICELIB_NO_FALLBACK to its original value. + sycl::detail::SYCLConfig::reset(); +} + +// The following helpers and test verify persistent cache usage when we have +// kernel bundle with multiple devices. +#define ASSERT_NO_ERROR(x) \ + if (std::error_code EC = x) { \ + FAIL() << #x ": did not return errc::success.\n" \ + << "error number: " << EC.value() << "\n" \ + << "error message: " << EC.message() << "\n"; \ + } + +std::vector Prog = {125, 1024, 256, 32}; + +static ur_result_t redefinedurProgramGetInfo(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppropName == UR_PROGRAM_INFO_NUM_DEVICES) { + auto value = reinterpret_cast(*params.ppPropValue); + *value = Prog.size(); + } + + if (*params.ppropName == UR_PROGRAM_INFO_DEVICES) { + if (*params.ppPropValue) { + for (size_t i = 0; i < Prog.size(); i++) { + auto devs = static_cast(*params.ppPropValue); + devs[i] = MockGPUDevices[i].getHandle(); + } + } + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_device_handle_t) * Prog.size(); + return UR_RESULT_SUCCESS; + } + + if (*params.ppropName == UR_PROGRAM_INFO_BINARY_SIZES) { + auto value = reinterpret_cast(*params.ppPropValue); + for (size_t i = 0; i < Prog.size(); ++i) + value[i] = Prog[i]; + } + + if (*params.ppropName == UR_PROGRAM_INFO_BINARIES) { + auto value = reinterpret_cast(*params.ppPropValue); + for (size_t i = 0; i < Prog.size(); ++i) { + for (int j = 0; j < Prog[i]; ++j) { + value[i][j] = i; + } + } + } + + return UR_RESULT_SUCCESS; +} + +TEST_P(MultipleDevsKernelBundleTest, PersistentCache) { + // Create temporary directory for the persistent cache in the directory of the + // test binary. + std::string PersistentCachePath = sycl::detail::OSUtil::getCurrentDSODir() + + detail::OSUtil::DirSep + "persistent_cache"; + // Set environment variables to enable persistent cache and set the cache + // path. + ScopedEnvVar var1("SYCL_CACHE_PERSISTENT", "1", + SYCLConfig::reset); + ScopedEnvVar var2("SYCL_CACHE_DIR", PersistentCachePath.c_str(), + SYCLConfig::reset); + + // Disable in-memory cache in this test case, as we are interested in + // persistent cache usage. + ScopedEnvVar var3("SYCL_CACHE_IN_MEM", "0", + SYCLConfig::reset); + + mock::getCallbacks().set_after_callback("urProgramGetInfo", + &redefinedurProgramGetInfo); + std::string CacheRoot = detail::PersistentDeviceCodeCache::getRootDir(); + ASSERT_NO_ERROR(llvm::sys::fs::remove_directories(CacheRoot)); + ASSERT_NO_ERROR(llvm::sys::fs::create_directories(CacheRoot)); + + // Get devices and create a context with at least 3 devices + std::vector Devices = + Plt.get_devices(sycl::info::device_type::gpu); + ASSERT_GE(Devices.size(), 3lu) << "Test requires at least 3 devices"; + + // Create a context with the selected devices + sycl::context Context({Devices[0], Devices[1], Devices[2], Devices[3]}); + auto KernelID = sycl::get_kernel_id(); + auto Bundle = + sycl::get_kernel_bundle(Context, {KernelID}); + + auto BundleExe = sycl::build(Bundle, {Devices[0], Devices[2]}); + + // Verify that binaries that we get from build stage for each device are put + // into the persistent cache. + sycl_device_binary_struct BinStruct = Imgs[0].convertToNativeType(); + sycl_device_binary Bin = &BinStruct; + detail::RTDeviceBinaryImage RTBinImg{Bin}; + auto Res = detail::PersistentDeviceCodeCache::getItemFromDisc( + {Devices[0], Devices[2]}, {&RTBinImg}, {}, {}); + EXPECT_EQ(Res.size(), static_cast(2)) + << "Expected cache items to be loaded"; + + // Now check that binaries from persistent cache are used to create a program + // when we submit a kernel. + ProgramCreateWithBinaryCounter = 0; + + sycl::queue q(Devices[2]); + q.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() {}); + }); + q.wait(); + + // Verify the number of urProgramCreateWithBinary calls + EXPECT_EQ(ProgramCreateWithBinaryCounter, 1) + << "Expect 1 urProgramCreateWithBinary calls"; + ASSERT_NO_ERROR(llvm::sys::fs::remove_directories(CacheRoot)); +} + +INSTANTIATE_TEST_SUITE_P(MultipleDevsKernelBundleTestInstance, + MultipleDevsKernelBundleTest, + testing::Values(SYCL_DEVICE_BINARY_TYPE_SPIRV, + SYCL_DEVICE_BINARY_TYPE_NATIVE)); diff --git a/sycl/unittests/program_manager/itt_annotations.cpp b/sycl/unittests/program_manager/itt_annotations.cpp index cb366f9170c9..c865882162f7 100644 --- a/sycl/unittests/program_manager/itt_annotations.cpp +++ b/sycl/unittests/program_manager/itt_annotations.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include #include +#include #include #include @@ -16,24 +17,7 @@ #include -// Same as defined in config.def -static constexpr auto ITTProfileEnvVarName = "INTEL_ENABLE_OFFLOAD_ANNOTATIONS"; - -static void set_env(const char *name, const char *value) { -#ifdef _WIN32 - (void)_putenv_s(name, value); -#else - (void)setenv(name, value, /*overwrite*/ 1); -#endif -} - -static void unset_env(const char *name) { -#ifdef _WIN32 - (void)_putenv_s(name, ""); -#else - unsetenv(name); -#endif -} +using namespace sycl::unittest; bool HasITTEnabled = false; @@ -50,16 +34,10 @@ static ur_result_t redefinedProgramSetSpecializationConstants(void *pParams) { return UR_RESULT_SUCCESS; } -static void reset() { - using namespace sycl::detail; - HasITTEnabled = false; - SYCLConfig::reset(); -} - TEST(ITTNotify, UseKernelBundle) { - set_env(ITTProfileEnvVarName, "1"); - - reset(); + ScopedEnvVar Var("INTEL_ENABLE_OFFLOAD_ANNOTATIONS", "1", + SYCLConfig::reset); + HasITTEnabled = false; sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); @@ -85,10 +63,9 @@ TEST(ITTNotify, UseKernelBundle) { } TEST(ITTNotify, VarNotSet) { - unset_env(ITTProfileEnvVarName); - - reset(); - + ScopedEnvVar Var("INTEL_ENABLE_OFFLOAD_ANNOTATIONS", nullptr, + SYCLConfig::reset); + HasITTEnabled = false; sycl::unittest::UrMock<> Mock; sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback(