diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 796182075ff3d..eab0fda9ff0d0 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2537,6 +2537,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { getAgent(), (uint64_t)Size); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for AMDGPU devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// Initialize the async info for interoperability purposes. Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override { // TODO: Implement this function. diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index c9ab34b024b77..1b110f6b74553 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -944,6 +944,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Instert a data fence between previous data operations and the following + /// operations if necessary for the device + virtual Error dataFence(__tgt_async_info *AsyncInfo) = 0; + /// Exchange data between devices (device to device transfer). Calling this /// function is only valid if GenericPlugin::isDataExchangable() passing the /// two devices returns true. @@ -1448,6 +1452,10 @@ struct GenericPluginTy { int DstDeviceId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo); + /// Places a fence between previous data movements and following data + /// movements if necessary on the device + int32_t data_fence(int32_t DeviceId, __tgt_async_info *AsyncInfo); + /// Begin executing a kernel on the given device. int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs, diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 083d41659a469..5a7d8e936313f 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -2324,3 +2324,15 @@ int32_t GenericPluginTy::async_barrier(omp_interop_val_t *Interop) { } return OFFLOAD_SUCCESS; } + +int32_t GenericPluginTy::data_fence(int32_t DeviceId, + __tgt_async_info *AsyncInfo) { + auto Err = getDevice(DeviceId).dataFence(AsyncInfo); + if (Err) { + REPORT("failure to place data fence on device %d: %s\n", DeviceId, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + + return OFFLOAD_SUCCESS; +} diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 82c9f9b706cbd..f3b2229daf372 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -856,6 +856,13 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for CUDA devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// Initialize the device info for interoperability purposes. Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override { assert(Context && "Context is null"); diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index ed5213531999d..b88435aa9e02b 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -295,6 +295,13 @@ struct GenELF64DeviceTy : public GenericDeviceTy { "dataExchangeImpl not supported"); } + /// Insert a data fence between previous data operations and the following + /// operations. This is a no-op for Host devices as operations inserted into + /// a queue are in-order. + Error dataFence(__tgt_async_info *Async) override { + return Plugin::success(); + } + /// All functions are already synchronous. No need to do anything on this /// synchronization function. Error synchronizeImpl(__tgt_async_info &AsyncInfo,