diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 839db6cfc3717..286546e0dbc9b 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -98,6 +98,7 @@ def Aspectext_oneapi_clock_sub_group : Aspect<"ext_oneapi_clock_sub_group">; def Aspectext_oneapi_clock_work_group : Aspect<"ext_oneapi_clock_work_group">; def Aspectext_oneapi_clock_device : Aspect<"ext_oneapi_clock_device">; def Aspectext_oneapi_is_integrated_gpu : Aspect<"ext_oneapi_is_integrated_gpu">; +def Aspectext_oneapi_ipc_memory : Aspect<"ext_oneapi_ipc_memory">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -176,7 +177,8 @@ def : TargetInfo<"__TestAspectList", Aspectext_oneapi_clock_sub_group, Aspectext_oneapi_clock_work_group, Aspectext_oneapi_clock_device, - Aspectext_oneapi_is_integrated_gpu], + Aspectext_oneapi_is_integrated_gpu, + Aspectext_oneapi_ipc_memory], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc new file mode 100644 index 0000000000000..38638594eb64d --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc @@ -0,0 +1,359 @@ += sycl_ext_oneapi_inter_process_communication + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2025 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +:khr-default-context: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:khr-default-context + +This extension is written against the SYCL 2020 revision 10 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + +[_Note:_ The APIs in this extension uses the concept of a per-platform +default context as specified in section 4.6.2 "Platform class" of the core SYCL +specification. +As a convenience, this extension specification describes the behavior of its +APIs by using the `khr_get_default_context` function from {khr-default-context}[ +sycl_khr_default_context], however there is no true dependency on that +extension. +An implementation could still implement +sycl_ext_oneapi_inter_process_communication even without implementing +sycl_khr_default_context because the core SYCL specification still requires +there to be a per-platform default context even if the core SYCL specification +does not provide a convenient way to get it. +_{endnote}_] + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Overview + +This extension adds the ability for SYCL programs to share device USM memory +allocations between processes. This is done by the allocating process creating +a new IPC memory handle through the new free functions and transferring the +returned handle data to the other processes. The other processes can use the +handle data to retrieve the corresponding device USM memory. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_INTER_PROCESS_COMMUNICATION` to one of the values defined +in the table below. Applications can test for the existence of this macro to +determine if the implementation supports this feature, or applications can test +the macro's value to determine which of the extension's features the +implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + +=== Extension to `enum class aspect` + +[source] +---- +namespace sycl { +enum class aspect { + ... + ext_oneapi_ipc_memory +} +} +---- + +If a SYCL device has this aspect, that device supports the `get` and `open` +functions specified in the following section. + + +=== Inter-process communicable memory + + +This extension adds new free functions under the `ipc_memory` experimental +namespace. + +``` +namespace sycl::ext::oneapi::experimental::ipc_memory { + +using handle_data_t = std::vector; + +handle_data_t get(void *ptr, const sycl::context &ctx); + +handle_data_t get(void *ptr); + +void put(const handle_data_t &handle_data, const sycl::context &ctx); + +void put(const handle_data_t &handle_data); + +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev); + +void *open(const handle_data_t &handle_data, const sycl::device &dev); + +void *open(const handle_data_t &handle_data); + +void close(void *ptr, const sycl::context &ctx); + +void close(void *ptr); + +} +``` + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +handle_data_t get(void *ptr, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ `ptr` is a pointer to USM device memory on some device _D_, and +`ctx` is the same context as `ptr` was allocated against, using the USM device +memory allocation routines. + +_Returns:_ An IPC "handle" to this USM memory allocation. The bytes of this +handle can be transferred to another process on the same system, and the other +process can use the handle to get a pointer to the same USM allocation through a +call to the `open` function. + +_Throws:_ An exception with the `errc::feature_not_supported` error code if +device _D_ does not have `aspect::ext_oneapi_ipc_memory`. + +!==== +a! +[source] +---- +handle_data_t get(void *ptr) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc_memory::get(ptr, ctxt); +---- + +!==== +a! +[source] +---- +void put(const handle_data_t &handle_data, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ `handle_data` is the IPC "handle" to USM device memory that was +returned from a call to `get` either in this process or in some other process on +the same system. The USM device memory has not yet been freed in this process. + +_Effects:_ Deallocates resources associated with the handle. These resources are +automatically deallocated when the USM device memory is freed, so it is not +strictly necessary to call the `put` function. After the resources associated +with the handle have been deallocated, i.e. through a call to the `put` function +or through freeing the USM device memory, the handle data is invalid and using +it in the `put` and `open` functions will result in undefined behavior. + +[_Note:_ Any pointers retrieved through a call to the `open` function in any +process on the system will still be valid after a call to the `put` function and +must still be freed through calls to the `close` function. +_{endnote}_] + +!==== +a! +[source] +---- +void put(const handle_data_t &handle_data) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +ipc_memory::put(handle_data, ctxt); +---- + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev) +---- +!==== + +_Preconditions:_ `handle_data` is the IPC "handle" to USM device memory that was +returned from a call to the `get` function either in this process or in some +other process on the same system. That USM device memory is accessible on device +`dev`. + +_Returns:_ A pointer to the same USM device memory represented by `handle_data`. +The returned pointer is associated with context `ctx`. It can be used wherever a +USM device pointer for device `dev` and context `ctx` is expected, except it +cannot be passed to `sycl::free`. Instead, use the `close` function to free this +memory pointer. + +[_Note:_ The `open` function can be called multiple times on the same handle +within the same process. The number of calls to the `close` function must be +equal to the number of calls to the `open` function to free the memory pointer. +_{endnote}_] + +[_Note:_ The pointer returned from a call to the `open` function is no longer +valid if the associated USM device memory is freed through a call to the +`sycl::free` function. +_{endnote}_] + +_Throws:_ + + * An exception with the `errc::feature_not_supported` error code if device + `dev` does not have `aspect::ext_oneapi_ipc_memory`. + * An exception with the `errc::invalid` error code if the handle data + `handle_data` has an unexpected number of bytes. + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::device &dev) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::context ctxt = dev.get_platform().khr_get_default_context(); +return ipc_memory::put(handle_data, ctxt, dev); +---- + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc_memory::open(handle_data, ctxt, d); +---- + +!==== +a! +[source] +---- +void close(void *ptr, const sycl::context &ctx) +---- +!==== + +_Precondition:_ `ptr` was previously returned from a call to the `open` function +in this same process, where `ctx` was passed as the context. This `ptr` value +has not yet been closed by calling the `close` function. + +_Effects:_ Closes a device USM pointer previously returned by a call to +the `open` function. + +!==== +a! +[source] +---- +void close(void *ptr) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +ipc_memory::close(ptr, ctxt); +---- + +|==== + + +== Issues + +=== Level Zero file descriptor duplication dependency + +The IPC memory APIs in Level Zero on Linux currently requires the ability to +duplicate file descriptors between processes. For security this is not allowed +by default on Linux-based systems, so in order for the IPC memory APIs to work +with Level Zero on Linux the user must either call `prctl(PR_SET_PTRACER, ...)` +in the IPC handle owner process or enable the functionality globally using + +```bash +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` + +See also https://github.com/oneapi-src/unified-memory-framework/tree/main?tab=readme-ov-file#level-zero-memory-provider. + + +=== Level Zero IPC memory Windows support + +The new IPC memory APIs are not currently supported on the Level Zero backend on +Windows systems. + diff --git a/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp new file mode 100644 index 0000000000000..b213e02fd4434 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp @@ -0,0 +1,72 @@ +//==------- ipc_memory.hpp --- SYCL inter-process communicable memory ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + +#include +#include +#include +#include +#include + +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace ext::oneapi::experimental::ipc_memory { + +using handle_data_t = std::vector; + +__SYCL_EXPORT handle_data_t get(void *Ptr, const sycl::context &Ctx); + +inline handle_data_t get(void *Ptr) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::get(Ptr, Ctx); +} + +__SYCL_EXPORT void put(const handle_data_t &HandleData, + const sycl::context &Ctx); + +inline void put(const handle_data_t &HandleData) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + ipc_memory::put(HandleData, Ctx); +} + +__SYCL_EXPORT void *open(const handle_data_t &HandleData, + const sycl::context &Ctx, const sycl::device &Dev); + +inline void *open(const handle_data_t &HandleData, const sycl::device &Dev) { + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::open(HandleData, Ctx, Dev); +} + +inline void *open(const handle_data_t &HandleData) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::open(HandleData, Ctx, Dev); +} + +__SYCL_EXPORT void close(void *Ptr, const sycl::context &Ctx); + +inline void close(void *Ptr) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + ipc_memory::close(Ptr, Ctx); +} + +} // namespace ext::oneapi::experimental::ipc_memory +} // namespace _V1 +} // namespace sycl + +#endif diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 4c5cb0a3ec8b2..b4c6839dd8b86 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -84,3 +84,4 @@ __SYCL_ASPECT(ext_oneapi_clock_sub_group, 91) __SYCL_ASPECT(ext_oneapi_clock_work_group, 92) __SYCL_ASPECT(ext_oneapi_clock_device, 93) __SYCL_ASPECT(ext_oneapi_is_integrated_gpu, 94) +__SYCL_ASPECT(ext_oneapi_ipc_memory, 95) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index a09870dd77c30..e84dc848c42cc 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -128,6 +128,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 4159303f87d19..7815aa511a866 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -322,6 +322,7 @@ set(SYCL_COMMON_SOURCES "handler.cpp" "image.cpp" "interop_handle.cpp" + "ipc_memory.cpp" "kernel.cpp" "kernel_bundle.cpp" "physical_mem.cpp" diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 13c5f5c16ce3c..9eaa57a2e0174 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1597,6 +1597,10 @@ class device_impl : public std::enable_shared_from_this { get_info_impl_nocheck().value_or( 0); } + CASE(ext_oneapi_ipc_memory) { + return get_info_impl_nocheck() + .value_or(0); + } else { return false; // This device aspect has not been implemented yet. } diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index 9c7a12379efdc..4d0b86f56ed99 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -162,6 +162,7 @@ MAP(UR_DEVICE_INFO_NODE_MASK, uint32_t) // These aren't present in the specification, extracted from ur_api.h // instead. MAP(UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP, ur_exp_device_2d_block_array_capability_flags_t) +MAP(UR_DEVICE_INFO_IPC_MEMORY_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP, ur_bool_t) diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index f89754b9cb465..362c31ee241bf 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -127,6 +127,7 @@ inline namespace _V1 { #define SYCL_KHR_DEFAULT_CONTEXT 1 #define SYCL_EXT_INTEL_EVENT_MODE 1 #define SYCL_EXT_ONEAPI_TANGLE 1 +#define SYCL_EXT_ONEAPI_INTER_PROCESS_COMMUNICATION 1 // Unfinished KHR extensions. These extensions are only available if the // __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS macro is defined. diff --git a/sycl/source/ipc_memory.cpp b/sycl/source/ipc_memory.cpp new file mode 100644 index 0000000000000..b79ac27cf1ba8 --- /dev/null +++ b/sycl/source/ipc_memory.cpp @@ -0,0 +1,106 @@ +//==------- ipc_memory.cpp --- SYCL inter-process communicable memory ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental::ipc_memory { + +__SYCL_EXPORT handle_data_t get(void *Ptr, const sycl::context &Ctx) { + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + + // If the API fails, check that the device actually supported it. We only do + // this if UR fails to avoid the device-lookup overhead. + auto CheckDeviceSupport = [Ptr, &Ctx]() { + sycl::device Dev = get_pointer_device(Ptr, Ctx); + if (!Dev.has(aspect::ext_oneapi_ipc_memory)) + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_ipc_memory."); + }; + + size_t HandleSize = 0; + auto UrRes = + Adapter.call_nocheck( + CtxImpl->getHandleRef(), Ptr, nullptr, &HandleSize); + if (UrRes != UR_RESULT_SUCCESS) { + CheckDeviceSupport(); + Adapter.checkUrResult(UrRes); + } + + handle_data_t Res(HandleSize); + UrRes = Adapter.call_nocheck( + CtxImpl->getHandleRef(), Ptr, Res.data(), nullptr); + if (UrRes != UR_RESULT_SUCCESS) { + CheckDeviceSupport(); + Adapter.checkUrResult(UrRes); + } + return Res; +} + +__SYCL_EXPORT void put(const handle_data_t &HandleData, + const sycl::context &Ctx) { + // TODO: UMF and UR currently requires the handle data to be non-const, so we + // need to make a copy of the data. Once this has been changed, the copy + // can be removed. + // CMPLRLLVM-71181 + // https://github.com/oneapi-src/unified-memory-framework/issues/1536 + handle_data_t HandleDataCopy = HandleData; + + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + Adapter.call( + CtxImpl->getHandleRef(), HandleDataCopy.data()); +} + +__SYCL_EXPORT void *open(const handle_data_t &HandleData, + const sycl::context &Ctx, const sycl::device &Dev) { + if (!Dev.has(aspect::ext_oneapi_ipc_memory)) + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_ipc_memory."); + + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + + // TODO: UMF and UR currently requires the handle data to be non-const, so we + // need to make a copy of the data. Once this has been changed, the copy + // can be removed. + // CMPLRLLVM-71181 + // https://github.com/oneapi-src/unified-memory-framework/issues/1536 + handle_data_t HandleDataCopy = HandleData; + + void *Ptr = nullptr; + ur_result_t UrRes = + Adapter.call_nocheck( + CtxImpl->getHandleRef(), getSyclObjImpl(Dev)->getHandleRef(), + HandleDataCopy.data(), HandleDataCopy.size(), &Ptr); + if (UrRes == UR_RESULT_ERROR_INVALID_VALUE) + throw sycl::exception(sycl::make_error_code(errc::invalid), + "HandleData data size does not correspond " + "to the target platform's IPC memory handle size."); + Adapter.checkUrResult(UrRes); + return Ptr; +} + +__SYCL_EXPORT void close(void *Ptr, const sycl::context &Ctx) { + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + Adapter.call( + CtxImpl->getHandleRef(), Ptr); +} + +} // namespace ext::oneapi::experimental::ipc_memory +} // namespace _V1 +} // namespace sycl diff --git a/sycl/test-e2e/Experimental/ipc_memory.cpp b/sycl/test-e2e/Experimental/ipc_memory.cpp new file mode 100644 index 0000000000000..19d8cb56c772c --- /dev/null +++ b/sycl/test-e2e/Experimental/ipc_memory.cpp @@ -0,0 +1,120 @@ +// REQUIRES: aspect-usm_device_allocations && aspect-ext_oneapi_ipc_memory + +// UNSUPPORTED: level_zero && windows +// UNSUPPORTED-TRACKER: UMFW-348 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include +#include + +#if defined(__linux__) +#include +#include +#include +#endif // defined(__linux__) + +namespace syclexp = sycl::ext::oneapi::experimental; + +constexpr size_t N = 32; +constexpr const char *CommsFile = "ipc_comms.txt"; + +int spawner(int argc, char *argv[]) { + assert(argc == 1); + sycl::queue Q; + +#if defined(__linux__) + // UMF currently requires ptrace permissions to be set for the spawner. As + // such we need to set it until this limitation has been addressed. + // https://github.com/oneapi-src/unified-memory-framework/tree/main?tab=readme-ov-file#level-zero-memory-provider + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + prctl(PR_SET_PTRACER, getppid()) == -1) { + std::cout << "Failed to set ptracer permissions!" << std::endl; + return 1; + } +#endif // defined(__linux__) + + int *DataPtr = sycl::malloc_device(N, Q); + Q.parallel_for(N, [=](sycl::item<1> I) { + DataPtr[I] = static_cast(I.get_linear_id()); + }).wait(); + + { + // Write handle data to file. + { + syclexp::ipc_memory::handle_data_t HandleData = + syclexp::ipc_memory::get(DataPtr, Q.get_context()); + size_t HandleDataSize = HandleData.size(); + std::fstream FS(CommsFile, std::ios_base::out | std::ios_base::binary); + FS.write(reinterpret_cast(&HandleDataSize), sizeof(size_t)); + FS.write(reinterpret_cast(HandleData.data()), + HandleDataSize); + } + + // Spawn other process with an argument. + std::string Cmd = std::string{argv[0]} + " 1"; + std::cout << "Spawning: " << Cmd << std::endl; + std::system(Cmd.c_str()); + } + + int Failures = 0; + int Read[N] = {0}; + Q.copy(DataPtr, Read, N).wait(); + for (size_t I = 0; I < N; ++I) { + if (Read[I] != (N - I)) { + ++Failures; + std::cout << "Failed from spawner: Result at " << I + << " unexpected: " << Read[I] << " != " << (N - I) << std::endl; + } + } + sycl::free(DataPtr, Q); + return Failures; +} + +int consumer() { + sycl::queue Q; + + // Read the handle data. + std::fstream FS(CommsFile, std::ios_base::in | std::ios_base::binary); + size_t HandleSize = 0; + FS.read(reinterpret_cast(&HandleSize), sizeof(size_t)); + std::unique_ptr HandleData{new std::byte[HandleSize]}; + FS.read(reinterpret_cast(HandleData.get()), HandleSize); + + // Open IPC handle. + syclexp::ipc_memory::handle_data_t Handle{HandleData.get(), + HandleData.get() + HandleSize}; + int *DataPtr = reinterpret_cast( + syclexp::ipc_memory::open(Handle, Q.get_context(), Q.get_device())); + + // Test the data already in the USM pointer. + int Failures = 0; + int Read[N] = {0}; + Q.copy(DataPtr, Read, N).wait(); + for (size_t I = 0; I < N; ++I) { + if (Read[I] != I) { + ++Failures; + std::cout << "Failed from consumer: Result at " << I + << " unexpected: " << Read[I] << " != " << I << std::endl; + } + } + + Q.parallel_for(N, [=](sycl::item<1> I) { + DataPtr[I] = static_cast(N - I.get_linear_id()); + }).wait(); + + // Close the IPC pointer. + syclexp::ipc_memory::close(DataPtr, Q.get_context()); + + return Failures; +} + +int main(int argc, char *argv[]) { + return argc == 1 ? spawner(argc, argv) : consumer(); +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f9a2adb393f4b..e1947798622b0 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2997,6 +2997,10 @@ _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm _ZN4sycl3_V13ext6oneapi12experimental10async_freeERKNS0_5queueEPvRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental10async_freeERNS0_7handlerEPv +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory3getEPvRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory3putERSt6vectorISt4byteSaIS6_EERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory4openERSt6vectorISt4byteSaIS6_EERKNS0_7contextERKNS0_6deviceE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory5closeEPvRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental11memory_pool21increase_threshold_toEm _ZN4sycl3_V13ext6oneapi12experimental11memory_poolC1ERKNS0_7contextERKNS0_6deviceENS0_3usm5allocENS4_15pool_propertiesE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 7506f4fec3f22..e05f726cbc504 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3837,6 +3837,7 @@ ?category@exception@_V1@sycl@@QEBAAEBVerror_category@std@@XZ ?checkNodePropertiesAndThrow@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@KAXAEBVproperty_list@67@@Z ?clearArgs@handler@_V1@sycl@@AEAAXXZ +?close@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVcontext@56@@Z ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ ?compile_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$00@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vstring_view@detail@_V1@sycl@@V?$allocator@Vstring_view@detail@_V1@sycl@@@std@@@std@@PEAVstring@156@2@Z ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z @@ -4050,6 +4051,7 @@ ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ +?get@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YA?AV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@PEAXAEBVcontext@56@@Z ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ ?get@platform@_V1@sycl@@QEBAPEAU_cl_platform_id@@XZ ?get@queue@_V1@sycl@@QEBAPEAU_cl_command_queue@@XZ @@ -4357,6 +4359,7 @@ ?modf_impl@detail@_V1@sycl@@YANNPEAN@Z ?name@SYCLCategory@detail@_V1@sycl@@UEBAPEBDXZ ?native_specialization_constant@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ +?open@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAPEAXAEAV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@AEBVcontext@56@AEBVdevice@56@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$00@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$01@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$02@23@Vkernel@23@@Z @@ -4376,6 +4379,7 @@ ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEBAXVstring_view@267@_N@Z ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBAXV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N@Z ?processArg@handler@_V1@sycl@@AEAAXPEAXAEBW4kernel_param_kind_t@detail@23@H_KAEA_K_N4@Z +?put@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAXAEAV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@AEBVcontext@56@@Z ?query@tls_code_loc_t@detail@_V1@sycl@@QEAAAEBUcode_location@234@XZ ?reduComputeWGSize@detail@_V1@sycl@@YA_K_K0AEA_K@Z ?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIAEAVhandler@23@@Z diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index db0bc0120e8db..acdac92fd666a 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -26,6 +26,7 @@ add_sycl_unittest(ExtensionsTests OBJECT DeviceInfo.cpp RootGroup.cpp USMPrefetch.cpp + IPC.cpp ) add_subdirectory(BindlessImages) diff --git a/sycl/unittests/Extensions/IPC.cpp b/sycl/unittests/Extensions/IPC.cpp new file mode 100644 index 0000000000000..196bb4c795bf9 --- /dev/null +++ b/sycl/unittests/Extensions/IPC.cpp @@ -0,0 +1,176 @@ +//==------------------------------ IPC.cpp ---------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +namespace { + +int DummyInt = 42; +void *DummyPtr = &DummyInt; + +constexpr size_t DummyHandleDataSize = 10; +std::byte DummyHandleData[DummyHandleDataSize] = { + std::byte{9}, std::byte{8}, std::byte{7}, std::byte{6}, std::byte{5}, + std::byte{4}, std::byte{3}, std::byte{2}, std::byte{1}, std::byte{0}}; + +thread_local int urIPCGetMemHandleExp_counter = 0; +thread_local int urIPCPutMemHandleExp_counter = 0; +thread_local int urIPCOpenMemHandleExp_counter = 0; +thread_local int urIPCCloseMemHandleExp_counter = 0; + +ur_result_t replace_urIPCGetMemHandleExp(void *pParams) { + ++urIPCGetMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppMem, DummyPtr); + if (*params.ppIPCMemHandleDataSizeRet) + **params.ppIPCMemHandleDataSizeRet = DummyHandleDataSize; + if (*params.ppIPCMemHandleData) + std::memcpy(*params.ppIPCMemHandleData, DummyHandleData, + DummyHandleDataSize); + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCPutMemHandleExp(void *pParams) { + ++urIPCPutMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ( + memcmp(*params.ppIPCMemHandleData, DummyHandleData, DummyHandleDataSize), + 0); + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCOpenMemHandleExp(void *pParams) { + ++urIPCOpenMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ( + memcmp(*params.ppIPCMemHandleData, DummyHandleData, DummyHandleDataSize), + 0); + EXPECT_EQ(*params.pipcMemHandleDataSize, DummyHandleDataSize); + **params.pppMem = DummyPtr; + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCCloseMemHandleExp(void *pParams) { + ++urIPCCloseMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppMem, DummyPtr); + return UR_RESULT_SUCCESS; +} + +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_IPC_MEMORY_SUPPORT_EXP: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_bool_t); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = ur_bool_t{true}; + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_SUCCESS; + } +} + +class IPCTests : public ::testing::Test { +public: + IPCTests() : Mock{}, Ctxt(sycl::platform()) {} + +protected: + void SetUp() override { + urIPCGetMemHandleExp_counter = 0; + urIPCPutMemHandleExp_counter = 0; + urIPCOpenMemHandleExp_counter = 0; + urIPCCloseMemHandleExp_counter = 0; + + mock::getCallbacks().set_replace_callback("urIPCGetMemHandleExp", + replace_urIPCGetMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCPutMemHandleExp", + replace_urIPCPutMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCOpenMemHandleExp", + replace_urIPCOpenMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCCloseMemHandleExp", + replace_urIPCCloseMemHandleExp); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + after_urDeviceGetInfo); + } + + sycl::unittest::UrMock<> Mock; + sycl::context Ctxt; +}; + +TEST_F(IPCTests, IPCGetPutImplicit) { + syclexp::ipc_memory::handle_data_t IPCMemHandleData = + syclexp::ipc_memory::get(DummyPtr, Ctxt); + EXPECT_EQ(IPCMemHandleData.size(), DummyHandleDataSize); + EXPECT_EQ( + memcmp(IPCMemHandleData.data(), DummyHandleData, IPCMemHandleData.size()), + 0); + + // Creating the IPC memory from a pointer should only call "get". It should be + // called twice: Once to get the size of the data and again to get the data. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 2); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); +} + +TEST_F(IPCTests, IPCGetPutExplicit) { + syclexp::ipc_memory::handle_data_t IPCMemHandleData = + syclexp::ipc_memory::get(DummyPtr, Ctxt); + EXPECT_EQ(IPCMemHandleData.size(), DummyHandleDataSize); + EXPECT_EQ( + memcmp(IPCMemHandleData.data(), DummyHandleData, IPCMemHandleData.size()), + 0); + + // Creating the IPC memory from a pointer should only call "get". It should be + // called twice: Once to get the size of the data and again to get the data. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 2); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); + + syclexp::ipc_memory::put(IPCMemHandleData, Ctxt); + + // Calling "put" explicitly should call the UR function. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 2); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 1); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); +} + +TEST_F(IPCTests, IPCOpenClose) { + syclexp::ipc_memory::handle_data_t HandleData{ + DummyHandleData, DummyHandleData + DummyHandleDataSize}; + void *Ptr = + syclexp::ipc_memory::open(HandleData, Ctxt, Ctxt.get_devices()[0]); + EXPECT_EQ(Ptr, DummyPtr); + + // Opening an IPC handle should call open. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 0); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 1); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); + + syclexp::ipc_memory::close(Ptr, Ctxt); + + // When we close an IPC memory pointer, it should call close. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 0); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 1); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 1); +} + +} // namespace