From 3d0de74a69c58b731e5b6a66a234444d4fcc2c1b Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 18 Sep 2025 14:01:16 +0100 Subject: [PATCH 1/4] [Offload] Re-allocate overlapping memory If olMemAlloc happens to allocate memory that was already allocated elsewhere (possibly by another device on another platform), it is now thrown away and a new allocation generated. A new `AllocBases` vector is now available, which is an ordered list of allocation start addresses. --- offload/liboffload/API/Memory.td | 3 + offload/liboffload/src/OffloadImpl.cpp | 69 ++++++++++++++++--- .../OffloadAPI/memory/olMemAlloc.cpp | 20 ++++++ 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td index cc98b672a26a9..debda165d2b23 100644 --- a/offload/liboffload/API/Memory.td +++ b/offload/liboffload/API/Memory.td @@ -21,6 +21,9 @@ def ol_alloc_type_t : Enum { def olMemAlloc : Function { let desc = "Creates a memory allocation on the specified device."; + let details = [ + "All allocations through olMemAlloc regardless of source share a single virtual address range. There is no risk of multiple devices returning equal pointers to different memory." + ]; let params = [ Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>, Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>, diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index b5b9b0e83b975..ed969f5f01095 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -182,6 +182,9 @@ namespace offload { struct AllocInfo { ol_device_handle_t Device; ol_alloc_type_t Type; + void *Start; + // One byte past the end + void *End; }; // Global shared state for liboffload @@ -200,6 +203,9 @@ struct OffloadContext { bool ValidationEnabled = true; DenseMap AllocInfoMap{}; std::mutex AllocInfoMapMutex{}; + // Partitioned list of memory base addresses. Each element in this list is a + // key in AllocInfoMap + llvm::SmallVector AllocBases{}; SmallVector Platforms{}; size_t RefCount; @@ -615,18 +621,58 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) { Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size, void **AllocationOut) { - auto Alloc = - Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type)); - if (!Alloc) - return Alloc.takeError(); + void *OldAlloc = nullptr; + + // Repeat the allocation up to a certain amount of times. If it happens to + // already be allocated (e.g. by a device from another vendor) throw it away + // and try again. + for (size_t Count = 0; Count < 10; Count++) { + auto NewAlloc = Device->Device->dataAlloc(Size, nullptr, + convertOlToPluginAllocTy(Type)); + if (!NewAlloc) + return NewAlloc.takeError(); + + if (OldAlloc) + if (auto Err = Device->Device->dataDelete(OldAlloc, + convertOlToPluginAllocTy(Type))) + return Err; - *AllocationOut = *Alloc; - { - std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex); - OffloadContext::get().AllocInfoMap.insert_or_assign( - *Alloc, AllocInfo{Device, Type}); + void *NewEnd = &reinterpret_cast(*NewAlloc)[Size]; + auto &AllocBases = OffloadContext::get().AllocBases; + auto &AllocInfoMap = OffloadContext::get().AllocInfoMap; + { + std::lock_guard Lock(OffloadContext::get().AllocInfoMapMutex); + + // Check that this memory region doesn't overlap another one + // That is, the start of this allocation needs to be after another + // allocation's end point, and the end of this allocation needs to be + // before the next one's start. + // `Gap` is the first alloc who ends after the new alloc's start point. + auto Gap = + std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc, + [&](const void *Iter, const void *Val) { + return AllocInfoMap.at(Iter).End <= Val; + }); + if (Gap == AllocBases.end() || NewEnd <= AllocInfoMap.at(*Gap).Start) { + // Success, no conflict + AllocInfoMap.insert_or_assign( + *NewAlloc, AllocInfo{Device, Type, *NewAlloc, NewEnd}); + AllocBases.insert( + std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc), + *NewAlloc); + *AllocationOut = *NewAlloc; + return Error::success(); + } + + // To avoid the next attempt allocating the same memory we just freed, we + // hold onto it until we complete the next allocation + OldAlloc = *NewAlloc; + } } - return Error::success(); + + // We've tried multiple times, and can't allocate a non-overlapping region. + return createOffloadError(ErrorCode::BACKEND_FAILURE, + "failed to allocate non-overlapping memory"); } Error olMemFree_impl(void *Address) { @@ -642,6 +688,9 @@ Error olMemFree_impl(void *Address) { Device = AllocInfo.Device; Type = AllocInfo.Type; OffloadContext::get().AllocInfoMap.erase(Address); + + auto &Bases = OffloadContext::get().AllocBases; + Bases.erase(std::lower_bound(Bases.begin(), Bases.end(), Address)); } if (auto Res = diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp index 00e428ec2abc7..445262aa0c583 100644 --- a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp +++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp @@ -34,6 +34,26 @@ TEST_P(olMemAllocTest, SuccessAllocDevice) { olMemFree(Alloc); } +TEST_P(olMemAllocTest, SuccessAllocMany) { + std::vector Allocs; + Allocs.reserve(1000); + + constexpr ol_alloc_type_t TYPES[3] = { + OL_ALLOC_TYPE_DEVICE, OL_ALLOC_TYPE_MANAGED, OL_ALLOC_TYPE_HOST}; + + for (size_t I = 1; I < 1000; I++) { + void *Alloc = nullptr; + ASSERT_SUCCESS(olMemAlloc(Device, TYPES[I % 3], 1024 * I, &Alloc)); + ASSERT_NE(Alloc, nullptr); + + Allocs.push_back(Alloc); + } + + for (auto *A : Allocs) { + olMemFree(A); + } +} + TEST_P(olMemAllocTest, InvalidNullDevice) { void *Alloc = nullptr; ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, From 22ff6295765729baacd745ff3dcf3e8593dc94ba Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 18 Sep 2025 14:21:21 +0100 Subject: [PATCH 2/4] Use static_cast --- offload/liboffload/src/OffloadImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index ed969f5f01095..ecf687d561c5b 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -637,7 +637,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, convertOlToPluginAllocTy(Type))) return Err; - void *NewEnd = &reinterpret_cast(*NewAlloc)[Size]; + void *NewEnd = &static_cast(*NewAlloc)[Size]; auto &AllocBases = OffloadContext::get().AllocBases; auto &AllocInfoMap = OffloadContext::get().AllocInfoMap; { From 15dfd03eb62b74990583cdf4bb425a099f1c4321 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 18 Sep 2025 15:21:22 +0100 Subject: [PATCH 3/4] Keep all rejects --- offload/liboffload/src/OffloadImpl.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index ecf687d561c5b..4a253c61a657b 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -621,7 +621,7 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) { Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size, void **AllocationOut) { - void *OldAlloc = nullptr; + SmallVector Rejects; // Repeat the allocation up to a certain amount of times. If it happens to // already be allocated (e.g. by a device from another vendor) throw it away @@ -632,11 +632,6 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, if (!NewAlloc) return NewAlloc.takeError(); - if (OldAlloc) - if (auto Err = Device->Device->dataDelete(OldAlloc, - convertOlToPluginAllocTy(Type))) - return Err; - void *NewEnd = &static_cast(*NewAlloc)[Size]; auto &AllocBases = OffloadContext::get().AllocBases; auto &AllocInfoMap = OffloadContext::get().AllocInfoMap; @@ -661,12 +656,17 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc), *NewAlloc); *AllocationOut = *NewAlloc; + + for (void *R : Rejects) + if (auto Err = + Device->Device->dataDelete(R, convertOlToPluginAllocTy(Type))) + return Err; return Error::success(); } // To avoid the next attempt allocating the same memory we just freed, we - // hold onto it until we complete the next allocation - OldAlloc = *NewAlloc; + // hold onto it until we complete the allocation + Rejects.push_back(*NewAlloc); } } From 99dd81048c4dbc00fddc5fe01b5402ecf88415fc Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Mon, 22 Sep 2025 13:06:17 +0100 Subject: [PATCH 4/4] Increase repeat count to 50 --- offload/liboffload/src/OffloadImpl.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 4a253c61a657b..58a1c09d7e96e 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -619,6 +619,7 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) { } } +constexpr size_t MAX_ALLOC_TRIES = 50; Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, size_t Size, void **AllocationOut) { SmallVector Rejects; @@ -626,7 +627,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, // Repeat the allocation up to a certain amount of times. If it happens to // already be allocated (e.g. by a device from another vendor) throw it away // and try again. - for (size_t Count = 0; Count < 10; Count++) { + for (size_t Count = 0; Count < MAX_ALLOC_TRIES; Count++) { auto NewAlloc = Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type)); if (!NewAlloc)