Skip to content

Commit 670c453

Browse files
authored
[Offload] Remove handling for device memory pool (#163629)
Summary: This was a lot of code that was only used for upstream LLVM builds of AMDGPU offloading. We have a generic and fast `malloc` in `libc` now so just use that. Simplifies code, can be added back if we start providing alternate forms but I don't think there's a single use-case that would justify it yet.
1 parent 527b7a4 commit 670c453

File tree

15 files changed

+50
-215
lines changed

15 files changed

+50
-215
lines changed

offload/include/Shared/Environment.h

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ enum class DeviceDebugKind : uint32_t {
2121
Assertion = 1U << 0,
2222
FunctionTracing = 1U << 1,
2323
CommonIssues = 1U << 2,
24-
AllocationTracker = 1U << 3,
2524
PGODump = 1U << 4,
2625
};
2726

@@ -36,27 +35,6 @@ struct DeviceEnvironmentTy {
3635
uint64_t HardwareParallelism;
3736
};
3837

39-
struct DeviceMemoryPoolTy {
40-
void *Ptr;
41-
uint64_t Size;
42-
};
43-
44-
struct DeviceMemoryPoolTrackingTy {
45-
uint64_t NumAllocations;
46-
uint64_t AllocationTotal;
47-
uint64_t AllocationMin;
48-
uint64_t AllocationMax;
49-
50-
void combine(DeviceMemoryPoolTrackingTy &Other) {
51-
NumAllocations += Other.NumAllocations;
52-
AllocationTotal += Other.AllocationTotal;
53-
AllocationMin = AllocationMin > Other.AllocationMin ? Other.AllocationMin
54-
: AllocationMin;
55-
AllocationMax = AllocationMax < Other.AllocationMax ? Other.AllocationMax
56-
: AllocationMax;
57-
}
58-
};
59-
6038
// NOTE: Please don't change the order of those members as their indices are
6139
// used in the middle end. Always add the new data member at the end.
6240
// Different from KernelEnvironmentTy below, this structure contains members

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,17 +3109,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31093109
StackSize = Value;
31103110
return Plugin::success();
31113111
}
3112-
Error getDeviceHeapSize(uint64_t &Value) override {
3113-
Value = DeviceMemoryPoolSize;
3114-
return Plugin::success();
3115-
}
3116-
Error setDeviceHeapSize(uint64_t Value) override {
3117-
for (DeviceImageTy *Image : LoadedImages)
3118-
if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
3119-
return Err;
3120-
DeviceMemoryPoolSize = Value;
3121-
return Plugin::success();
3122-
}
31233112
Error getDeviceMemorySize(uint64_t &Value) override {
31243113
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
31253114
if (Pool->isGlobal()) {
@@ -3321,9 +3310,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
33213310
/// Reference to the host device.
33223311
AMDHostDeviceTy &HostDevice;
33233312

3324-
/// The current size of the global device memory pool (managed by us).
3325-
uint64_t DeviceMemoryPoolSize = 1L << 29L /*512MB=*/;
3326-
33273313
/// The current size of the stack that will be used in cases where it could
33283314
/// not be statically determined.
33293315
uint64_t StackSize = 16 * 1024 /* 16 KB */;

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -819,10 +819,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
819819
Error unloadBinary(DeviceImageTy *Image);
820820
virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
821821

822-
/// Setup the global device memory pool, if the plugin requires one.
823-
Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
824-
uint64_t PoolSize);
825-
826822
// Setup the RPC server for this device if needed. This may not run on some
827823
// plugins like the CPU targets. By default, it will not be executed so it is
828824
// up to the target to override this using the shouldSetupRPCServer function.
@@ -1067,6 +1063,15 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10671063

10681064
virtual Error getDeviceStackSize(uint64_t &V) = 0;
10691065

1066+
virtual Error getDeviceHeapSize(uint64_t &V) {
1067+
return Plugin::error(error::ErrorCode::UNSUPPORTED,
1068+
"%s not supported by platform", __func__);
1069+
}
1070+
virtual Error setDeviceHeapSize(uint64_t V) {
1071+
return Plugin::error(error::ErrorCode::UNSUPPORTED,
1072+
"%s not supported by platform", __func__);
1073+
}
1074+
10701075
/// Returns true if current plugin architecture is an APU
10711076
/// and unified_shared_memory was not requested by the program.
10721077
bool useAutoZeroCopy();
@@ -1159,12 +1164,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
11591164
/// plugin can implement the setters as no-op and setting the output
11601165
/// value to zero for the getters.
11611166
virtual Error setDeviceStackSize(uint64_t V) = 0;
1162-
virtual Error getDeviceHeapSize(uint64_t &V) = 0;
1163-
virtual Error setDeviceHeapSize(uint64_t V) = 0;
1164-
1165-
/// Indicate whether the device should setup the global device memory pool. If
1166-
/// false is return the value on the device will be uninitialized.
1167-
virtual bool shouldSetupDeviceMemoryPool() const { return true; }
11681167

11691168
/// Indicate whether or not the device should setup the RPC server. This is
11701169
/// only necessary for unhosted targets like the GPU.
@@ -1251,10 +1250,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
12511250
/// Internal representation for OMPT device (initialize & finalize)
12521251
std::atomic<bool> OmptInitialized;
12531252
#endif
1254-
1255-
private:
1256-
DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
1257-
DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
12581253
};
12591254

12601255
/// Class implementing common functionalities of offload plugins. Each plugin

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 0 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -795,19 +795,6 @@ Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
795795
if (auto Err = callGlobalDestructors(Plugin, *Image))
796796
return Err;
797797

798-
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
799-
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
800-
DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
801-
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
802-
sizeof(DeviceMemoryPoolTrackingTy),
803-
&ImageDeviceMemoryPoolTracking);
804-
if (auto Err =
805-
GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
806-
consumeError(std::move(Err));
807-
}
808-
DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
809-
}
810-
811798
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
812799
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
813800
if (!ProfOrErr)
@@ -833,22 +820,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
833820
return Err;
834821
LoadedImages.clear();
835822

836-
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
837-
// TODO: Write this by default into a file.
838-
printf("\n\n|-----------------------\n"
839-
"| Device memory tracker:\n"
840-
"|-----------------------\n"
841-
"| #Allocations: %lu\n"
842-
"| Byes allocated: %lu\n"
843-
"| Minimal allocation: %lu\n"
844-
"| Maximal allocation: %lu\n"
845-
"|-----------------------\n\n\n",
846-
DeviceMemoryPoolTracking.NumAllocations,
847-
DeviceMemoryPoolTracking.AllocationTotal,
848-
DeviceMemoryPoolTracking.AllocationMin,
849-
DeviceMemoryPoolTracking.AllocationMax);
850-
}
851-
852823
// Delete the memory manager before deinitializing the device. Otherwise,
853824
// we may delete device allocations after the device is deinitialized.
854825
if (MemoryManager)
@@ -901,18 +872,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
901872
// Add the image to list.
902873
LoadedImages.push_back(Image);
903874

904-
// Setup the global device memory pool if needed.
905-
if (!Plugin.getRecordReplay().isReplaying() &&
906-
shouldSetupDeviceMemoryPool()) {
907-
uint64_t HeapSize;
908-
auto SizeOrErr = getDeviceHeapSize(HeapSize);
909-
if (SizeOrErr) {
910-
REPORT("No global device memory pool due to error: %s\n",
911-
toString(std::move(SizeOrErr)).data());
912-
} else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize))
913-
return std::move(Err);
914-
}
915-
916875
if (auto Err = setupRPCServer(Plugin, *Image))
917876
return std::move(Err);
918877

@@ -936,51 +895,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
936895
return Image;
937896
}
938897

939-
Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
940-
DeviceImageTy &Image,
941-
uint64_t PoolSize) {
942-
// Free the old pool, if any.
943-
if (DeviceMemoryPool.Ptr) {
944-
if (auto Err = dataDelete(DeviceMemoryPool.Ptr,
945-
TargetAllocTy::TARGET_ALLOC_DEVICE))
946-
return Err;
947-
}
948-
949-
DeviceMemoryPool.Size = PoolSize;
950-
auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr,
951-
TargetAllocTy::TARGET_ALLOC_DEVICE);
952-
if (AllocOrErr) {
953-
DeviceMemoryPool.Ptr = *AllocOrErr;
954-
} else {
955-
auto Err = AllocOrErr.takeError();
956-
REPORT("Failure to allocate device memory for global memory pool: %s\n",
957-
toString(std::move(Err)).data());
958-
DeviceMemoryPool.Ptr = nullptr;
959-
DeviceMemoryPool.Size = 0;
960-
}
961-
962-
// Create the metainfo of the device environment global.
963-
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
964-
if (!GHandler.isSymbolInImage(*this, Image,
965-
"__omp_rtl_device_memory_pool_tracker")) {
966-
DP("Skip the memory pool as there is no tracker symbol in the image.");
967-
return Error::success();
968-
}
969-
970-
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
971-
sizeof(DeviceMemoryPoolTrackingTy),
972-
&DeviceMemoryPoolTracking);
973-
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal))
974-
return Err;
975-
976-
// Create the metainfo of the device environment global.
977-
GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool",
978-
sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool);
979-
980-
// Write device environment values to the device.
981-
return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
982-
}
983-
984898
Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
985899
DeviceImageTy &Image) {
986900
// The plugin either does not need an RPC server or it is unavailable.

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,11 +1235,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
12351235
return Info;
12361236
}
12371237

1238-
virtual bool shouldSetupDeviceMemoryPool() const override {
1239-
/// We use the CUDA malloc for now.
1240-
return false;
1241-
}
1242-
12431238
/// Getters and setters for stack and heap sizes.
12441239
Error getDeviceStackSize(uint64_t &Value) override {
12451240
return getCtxLimit(CU_LIMIT_STACK_SIZE, Value);

offload/plugins-nextgen/host/src/rtl.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -380,9 +380,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
380380
return Info;
381381
}
382382

383-
/// This plugin should not setup the device environment or memory pool.
384-
virtual bool shouldSetupDeviceMemoryPool() const override { return false; };
385-
386383
/// Getters and setters for stack size and heap size not relevant.
387384
Error getDeviceStackSize(uint64_t &Value) override {
388385
Value = 0;
@@ -391,11 +388,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
391388
Error setDeviceStackSize(uint64_t Value) override {
392389
return Plugin::success();
393390
}
394-
Error getDeviceHeapSize(uint64_t &Value) override {
395-
Value = 0;
396-
return Plugin::success();
397-
}
398-
Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); }
399391

400392
private:
401393
/// Grid values for Generic ELF64 plugins.
File renamed without changes.

offload/test/mapping/lambda_mapping.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
55
// RUN: %libomptarget-compileoptxx-run-and-check-generic
66

7+
// REQUIRES: libc
8+
79
#include <iostream>
810

911
template <typename LOOP_BODY>

offload/test/offloading/malloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ int main() {
1010
int Threads = 64;
1111
int Teams = 10;
1212

13-
// Allocate ~55MB on the device.
13+
// Allocate ~160 KiB on the device.
1414
#pragma omp target map(from : DP)
1515
DP = (long unsigned *)malloc(sizeof(long unsigned) * N * Threads * Teams);
1616

openmp/device/include/Allocator.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,12 @@
1414

1515
#include "DeviceTypes.h"
1616

17-
// Forward declaration.
18-
struct KernelEnvironmentTy;
19-
2017
namespace ompx {
2118

2219
namespace allocator {
2320

2421
static uint64_t constexpr ALIGNMENT = 16;
2522

26-
/// Initialize the allocator according to \p KernelEnvironment
27-
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
28-
2923
/// Allocate \p Size bytes.
3024
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
3125
alloc(uint64_t Size);

0 commit comments

Comments
 (0)