Skip to content

Commit ce14b3c

Browse files
kevinsalajhuber6
authored andcommitted
[Offload] Use Error for allocating/deallocating in plugins (llvm#160811)
Co-authored-by: Joseph Huber <[email protected]>
1 parent b023cc5 commit ce14b3c

File tree

6 files changed

+158
-126
lines changed

6 files changed

+158
-126
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,11 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
423423
assert(MemoryManager && "Invalid memory manager");
424424
assert(PtrStorage && "Invalid pointer storage");
425425

426-
*PtrStorage = MemoryManager->allocate(Size, nullptr);
426+
auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr);
427+
if (!PtrStorageOrErr)
428+
return PtrStorageOrErr.takeError();
429+
430+
*PtrStorage = *PtrStorageOrErr;
427431
if (Size && *PtrStorage == nullptr)
428432
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
429433
"failure to allocate from AMDGPU memory manager");
@@ -443,15 +447,12 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy {
443447
private:
444448
/// Allocation callback that will be called once the memory manager does not
445449
/// have more previously allocated buffers.
446-
void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) override;
450+
Expected<void *> allocate(size_t Size, void *HstPtr,
451+
TargetAllocTy Kind) override;
447452

448453
/// Deallocation callback that will be called by the memory manager.
449-
int free(void *TgtPtr, TargetAllocTy Kind) override {
450-
if (auto Err = MemoryPool->deallocate(TgtPtr)) {
451-
consumeError(std::move(Err));
452-
return OFFLOAD_FAIL;
453-
}
454-
return OFFLOAD_SUCCESS;
454+
Error free(void *TgtPtr, TargetAllocTy Kind) override {
455+
return MemoryPool->deallocate(TgtPtr);
455456
}
456457

457458
/// The underlying plugin that owns this memory manager.
@@ -2339,12 +2340,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
23392340
}
23402341

23412342
/// Allocate memory on the device or related to the device.
2342-
void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
2343+
Expected<void *> allocate(size_t Size, void *, TargetAllocTy Kind) override;
23432344

23442345
/// Deallocate memory on the device or related to the device.
2345-
int free(void *TgtPtr, TargetAllocTy Kind) override {
2346+
Error free(void *TgtPtr, TargetAllocTy Kind) override {
23462347
if (TgtPtr == nullptr)
2347-
return OFFLOAD_SUCCESS;
2348+
return Plugin::success();
23482349

23492350
AMDGPUMemoryPoolTy *MemoryPool = nullptr;
23502351
switch (Kind) {
@@ -2360,17 +2361,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
23602361
break;
23612362
}
23622363

2363-
if (!MemoryPool) {
2364-
REPORT("No memory pool for the specified allocation kind\n");
2365-
return OFFLOAD_FAIL;
2366-
}
2364+
if (!MemoryPool)
2365+
return Plugin::error(ErrorCode::OUT_OF_RESOURCES,
2366+
"no memory pool for the specified allocation kind");
23672367

2368-
if (Error Err = MemoryPool->deallocate(TgtPtr)) {
2369-
REPORT("%s\n", toString(std::move(Err)).data());
2370-
return OFFLOAD_FAIL;
2371-
}
2368+
if (auto Err = MemoryPool->deallocate(TgtPtr))
2369+
return Err;
23722370

2373-
return OFFLOAD_SUCCESS;
2371+
return Plugin::success();
23742372
}
23752373

23762374
/// Synchronize current thread with the pending operations on the async info.
@@ -3813,14 +3811,13 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
38133811
return Plugin::error(OffloadErrCode, ErrFmt, Args..., Desc);
38143812
}
38153813

3816-
void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
3817-
TargetAllocTy Kind) {
3814+
Expected<void *> AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
3815+
TargetAllocTy Kind) {
38183816
// Allocate memory from the pool.
38193817
void *Ptr = nullptr;
3820-
if (auto Err = MemoryPool->allocate(Size, &Ptr)) {
3821-
consumeError(std::move(Err));
3822-
return nullptr;
3823-
}
3818+
if (auto Err = MemoryPool->allocate(Size, &Ptr))
3819+
return std::move(Err);
3820+
38243821
assert(Ptr && "Invalid pointer");
38253822

38263823
// Get a list of agents that can access this memory pool.
@@ -3830,14 +3827,13 @@ void *AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr,
38303827
[&](hsa_agent_t Agent) { return MemoryPool->canAccess(Agent); });
38313828

38323829
// Allow all valid kernel agents to access the allocation.
3833-
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents)) {
3834-
REPORT("%s\n", toString(std::move(Err)).data());
3835-
return nullptr;
3836-
}
3830+
if (auto Err = MemoryPool->enableAccess(Ptr, Size, Agents))
3831+
return std::move(Err);
38373832
return Ptr;
38383833
}
38393834

3840-
void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
3835+
Expected<void *> AMDGPUDeviceTy::allocate(size_t Size, void *,
3836+
TargetAllocTy Kind) {
38413837
if (Size == 0)
38423838
return nullptr;
38433839

@@ -3856,17 +3852,14 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
38563852
break;
38573853
}
38583854

3859-
if (!MemoryPool) {
3860-
REPORT("No memory pool for the specified allocation kind\n");
3861-
return nullptr;
3862-
}
3855+
if (!MemoryPool)
3856+
return Plugin::error(ErrorCode::UNSUPPORTED,
3857+
"no memory pool for the specified allocation kind");
38633858

38643859
// Allocate from the corresponding memory pool.
38653860
void *Alloc = nullptr;
3866-
if (Error Err = MemoryPool->allocate(Size, &Alloc)) {
3867-
REPORT("%s\n", toString(std::move(Err)).data());
3868-
return nullptr;
3869-
}
3861+
if (auto Err = MemoryPool->allocate(Size, &Alloc))
3862+
return std::move(Err);
38703863

38713864
if (Alloc) {
38723865
// Get a list of agents that can access this memory pool. Inherently
@@ -3879,10 +3872,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
38793872
});
38803873

38813874
// Enable all valid kernel agents to access the buffer.
3882-
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents)) {
3883-
REPORT("%s\n", toString(std::move(Err)).data());
3884-
return nullptr;
3885-
}
3875+
if (auto Err = MemoryPool->enableAccess(Alloc, Size, Agents))
3876+
return std::move(Err);
38863877
}
38873878

38883879
return Alloc;

offload/plugins-nextgen/common/include/MemoryManager.h

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,24 @@
2525
#include "Shared/Utils.h"
2626
#include "omptarget.h"
2727

28+
#include "llvm/Support/Error.h"
29+
30+
namespace llvm {
31+
2832
/// Base class of per-device allocator.
2933
class DeviceAllocatorTy {
3034
public:
3135
virtual ~DeviceAllocatorTy() = default;
3236

3337
/// Allocate a memory of size \p Size . \p HstPtr is used to assist the
3438
/// allocation.
35-
virtual void *allocate(size_t Size, void *HstPtr,
36-
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
39+
virtual Expected<void *>
40+
allocate(size_t Size, void *HstPtr,
41+
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
3742

3843
/// Delete the pointer \p TgtPtr on the device
39-
virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
44+
virtual Error free(void *TgtPtr,
45+
TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
4046
};
4147

4248
/// Class of memory manager. The memory manager is per-device by using
@@ -134,17 +140,17 @@ class MemoryManagerTy {
134140
size_t SizeThreshold = 1U << 13;
135141

136142
/// Request memory from target device
137-
void *allocateOnDevice(size_t Size, void *HstPtr) const {
143+
Expected<void *> allocateOnDevice(size_t Size, void *HstPtr) const {
138144
return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE);
139145
}
140146

141147
/// Deallocate data on device
142-
int deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
148+
Error deleteOnDevice(void *Ptr) const { return DeviceAllocator.free(Ptr); }
143149

144150
/// This function is called when it tries to allocate memory on device but the
145151
/// device returns out of memory. It will first free all memory in the
146152
/// FreeList and try to allocate again.
147-
void *freeAndAllocate(size_t Size, void *HstPtr) {
153+
Expected<void *> freeAndAllocate(size_t Size, void *HstPtr) {
148154
std::vector<void *> RemoveList;
149155

150156
// Deallocate all memory in FreeList
@@ -154,7 +160,8 @@ class MemoryManagerTy {
154160
if (List.empty())
155161
continue;
156162
for (const NodeTy &N : List) {
157-
deleteOnDevice(N.Ptr);
163+
if (auto Err = deleteOnDevice(N.Ptr))
164+
return Err;
158165
RemoveList.push_back(N.Ptr);
159166
}
160167
FreeLists[I].clear();
@@ -175,14 +182,22 @@ class MemoryManagerTy {
175182
/// allocate directly on the device. If a \p nullptr is returned, it might
176183
/// be because the device is OOM. In that case, it will free all unused
177184
/// memory and then try again.
178-
void *allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr) {
179-
void *TgtPtr = allocateOnDevice(Size, HstPtr);
185+
Expected<void *> allocateOrFreeAndAllocateOnDevice(size_t Size,
186+
void *HstPtr) {
187+
auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr);
188+
if (!TgtPtrOrErr)
189+
return TgtPtrOrErr.takeError();
190+
191+
void *TgtPtr = *TgtPtrOrErr;
180192
// We cannot get memory from the device. It might be due to OOM. Let's
181193
// free all memory in FreeLists and try again.
182194
if (TgtPtr == nullptr) {
183195
DP("Failed to get memory on device. Free all memory in FreeLists and "
184196
"try again.\n");
185-
TgtPtr = freeAndAllocate(Size, HstPtr);
197+
TgtPtrOrErr = freeAndAllocate(Size, HstPtr);
198+
if (!TgtPtrOrErr)
199+
return TgtPtrOrErr.takeError();
200+
TgtPtr = *TgtPtrOrErr;
186201
}
187202

188203
if (TgtPtr == nullptr)
@@ -204,16 +219,17 @@ class MemoryManagerTy {
204219

205220
/// Destructor
206221
~MemoryManagerTy() {
207-
for (auto Itr = PtrToNodeTable.begin(); Itr != PtrToNodeTable.end();
208-
++Itr) {
209-
assert(Itr->second.Ptr && "nullptr in map table");
210-
deleteOnDevice(Itr->second.Ptr);
222+
for (auto &PtrToNode : PtrToNodeTable) {
223+
assert(PtrToNode.second.Ptr && "nullptr in map table");
224+
if (auto Err = deleteOnDevice(PtrToNode.second.Ptr))
225+
REPORT("Failure to delete memory: %s\n",
226+
toString(std::move(Err)).data());
211227
}
212228
}
213229

214230
/// Allocate memory of size \p Size from target device. \p HstPtr is used to
215231
/// assist the allocation.
216-
void *allocate(size_t Size, void *HstPtr) {
232+
Expected<void *> allocate(size_t Size, void *HstPtr) {
217233
// If the size is zero, we will not bother the target device. Just return
218234
// nullptr directly.
219235
if (Size == 0)
@@ -228,11 +244,14 @@ class MemoryManagerTy {
228244
DP("%zu is greater than the threshold %zu. Allocate it directly from "
229245
"device\n",
230246
Size, SizeThreshold);
231-
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
247+
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
248+
if (!TgtPtrOrErr)
249+
return TgtPtrOrErr.takeError();
232250

233-
DP("Got target pointer " DPxMOD ". Return directly.\n", DPxPTR(TgtPtr));
251+
DP("Got target pointer " DPxMOD ". Return directly.\n",
252+
DPxPTR(*TgtPtrOrErr));
234253

235-
return TgtPtr;
254+
return *TgtPtrOrErr;
236255
}
237256

238257
NodeTy *NodePtr = nullptr;
@@ -260,8 +279,11 @@ class MemoryManagerTy {
260279
if (NodePtr == nullptr) {
261280
DP("Cannot find a node in the FreeLists. Allocate on device.\n");
262281
// Allocate one on device
263-
void *TgtPtr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
282+
auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr);
283+
if (!TgtPtrOrErr)
284+
return TgtPtrOrErr.takeError();
264285

286+
void *TgtPtr = *TgtPtrOrErr;
265287
if (TgtPtr == nullptr)
266288
return nullptr;
267289

@@ -282,7 +304,7 @@ class MemoryManagerTy {
282304
}
283305

284306
/// Deallocate memory pointed by \p TgtPtr
285-
int free(void *TgtPtr) {
307+
Error free(void *TgtPtr) {
286308
DP("MemoryManagerTy::free: target memory " DPxMOD ".\n", DPxPTR(TgtPtr));
287309

288310
NodeTy *P = nullptr;
@@ -314,7 +336,7 @@ class MemoryManagerTy {
314336
FreeLists[B].insert(*P);
315337
}
316338

317-
return OFFLOAD_SUCCESS;
339+
return Error::success();
318340
}
319341

320342
/// Get the size threshold from the environment variable
@@ -344,4 +366,6 @@ class MemoryManagerTy {
344366
constexpr const size_t MemoryManagerTy::BucketSize[];
345367
constexpr const int MemoryManagerTy::NumBuckets;
346368

369+
} // namespace llvm
370+
347371
#endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_MEMORYMANAGER_H

0 commit comments

Comments
 (0)