Skip to content

Commit d6221ba

Browse files
committed
[AMDGPU][Offload] Adjustment to reflect upstream PR151882
1 parent 5268586 commit d6221ba

File tree

5 files changed

+72
-28
lines changed

5 files changed

+72
-28
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4513,36 +4513,34 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
45134513

45144514
bool checkIfCoarseGrainMemoryNearOrAbove64GB() {
45154515
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
4516-
if (Pool->isGlobal() && Pool->isCoarseGrained()) {
4517-
uint64_t Value;
4518-
hsa_status_t Status =
4519-
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
4520-
if (Status != HSA_STATUS_SUCCESS) continue;
4521-
constexpr uint64_t Almost64Gig = 0xFF0000000;
4522-
if (Value >= Almost64Gig) return true;
4523-
}
4516+
if (!Pool->isGlobal() || !Pool->isCoarseGrained())
4517+
continue;
4518+
uint64_t Value;
4519+
hsa_status_t Status =
4520+
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
4521+
if (Status != HSA_STATUS_SUCCESS)
4522+
continue;
4523+
constexpr uint64_t Almost64Gig = 0xFF0000000;
4524+
if (Value >= Almost64Gig)
4525+
return true;
45244526
}
45254527
return false; // CoarseGrain pool w/ 64GB or more capacity not found
45264528
}
45274529

45284530
size_t getMemoryManagerSizeThreshold() override {
4529-
// TODO: check performance on lower memory capacity GPU
4530-
// for lowering the threshold from 64GB.
4531+
// Targeting high memory capacity GPUs such as
4532+
// data center GPUs.
45314533
if (checkIfCoarseGrainMemoryNearOrAbove64GB()) {
4532-
// Set GenericDeviceTy::MemoryManager's Threshold to ~2GB,
4533-
// used if not set by LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD
4534-
// ENV var. This MemoryManager is used for
4535-
// omp_target_alloc(), OpenMP (non-usm) map clause, etc.
4536-
//
4537-
// TODO 1: Fine tune to lower the threshold closer to 1GB.
4538-
// TODO 2: HSA-level memory manager on the user-side such that
4539-
// memory management is shared with HIP and OpenCL.
4534+
// Set GenericDeviceTy::MemoryManager's Threshold to 3GiB,
4535+
// if threshold is not already set by ENV var
4536+
// LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD.
4537+
// This MemoryManager is used for omp_target_alloc(), OpenMP
4538+
// (non-usm) map clause, etc.
45404539
//
4541-
// If this value needs to go above UINT_MAX, consider
4542-
// adding sizeof(size_t) check to avoid unpleasant truncation
4543-
// surprises where size_t is still 32bit.
4544-
constexpr size_t Almost3Gig = 3000000000u;
4545-
return Almost3Gig;
4540+
// Ideally, this kind of pooling is best performed at
4541+
// a common level (e.g, user side of HSA) between OpenMP and HIP
4542+
// but that feature does not exist (yet).
4543+
return 3ul * 1024 * 1024 * 1024 /* 3 GiB */;
45464544
}
45474545
return 0;
45484546
}

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1342,7 +1342,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
13421342
MemoryManagerTy *MemoryManager;
13431343

13441344
/// Per device setting of MemoryManager's Threshold
1345-
virtual size_t getMemoryManagerSizeThreshold() { return 0 /* use default */; }
1345+
virtual size_t getMemoryManagerSizeThreshold() { return 0; }
13461346

13471347
/// Environment variables defined by the OpenMP standard.
13481348
Int32Envar OMP_TeamLimit;

offload/test/lit.cfg

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ if config.libomptarget_test_pgo:
121121
# For all other targets, we currently assume it is.
122122
supports_unified_shared_memory = True
123123
supports_apu = False
124+
supports_large_allocation_memory_pool = False
124125
is_mi200 = False
125126
if config.libomptarget_current_target.startswith('nvptx'):
126127
try:
@@ -133,9 +134,11 @@ if config.libomptarget_current_target.startswith('nvptx'):
133134
elif config.libomptarget_current_target.startswith('amdgcn'):
134135
# amdgpu_test_arch contains a list of AMD GPUs in the system
135136
# only check the first one assuming that we will run the test on it.
136-
if not (config.amdgpu_test_arch.startswith("gfx90a") or
137-
config.amdgpu_test_arch.startswith("gfx942") or
138-
config.amdgpu_test_arch.startswith("gfx950")):
137+
if (config.amdgpu_test_arch.startswith("gfx90a") or
138+
config.amdgpu_test_arch.startswith("gfx942") or
139+
config.amdgpu_test_arch.startswith("gfx950")):
140+
supports_large_allocation_memory_pool = True
141+
else:
139142
supports_unified_shared_memory = False
140143
# check if AMD architecture is an APU:
141144
if ((config.amdgpu_test_arch.startswith("gfx942") and
@@ -152,7 +155,8 @@ elif config.libomptarget_current_target.startswith('amdgcn'):
152155
config.available_features.add('apu')
153156
if is_mi200:
154157
config.available_features.add('mi200')
155-
158+
if supports_large_allocation_memory_pool:
159+
config.available_features.add('large_allocation_memory_pool')
156160

157161
# Setup environment to find dynamic library at runtime
158162
if config.operating_system == 'Windows':

offload/test/sanitizer/use_after_free_2.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
// UNSUPPORTED: s390x-ibm-linux-gnu
1111
// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
1212

13+
// If offload memory pooling is enabled for a large allocation, reuse error is
14+
// not detected.
15+
// UNSUPPORTED: large_allocation_memory_pool
16+
1317
#include <omp.h>
1418

1519
int main() {
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// clang-format off
2+
// RUN: %libomptarget-compileopt-generic
3+
// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD=1024 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
4+
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK-PASS
5+
// clang-format on
6+
7+
// If offload memory pooling is enabled for a large allocation, reuse error is
8+
// not detected. Run the test w/ and w/o ENV var override on memory pooling
9+
// threshold.
10+
// REQUIRES: large_allocation_memory_pool
11+
12+
#include <omp.h>
13+
#include <stdio.h>
14+
15+
int main() {
16+
int N = (1 << 30);
17+
char *A = (char *)malloc(N);
18+
char *P;
19+
#pragma omp target map(A[ : N]) map(from : P)
20+
{
21+
P = &A[N / 2];
22+
*P = 3;
23+
}
24+
// clang-format off
25+
// CHECK: OFFLOAD ERROR: memory access fault by GPU {{.*}} (agent 0x{{.*}}) at virtual address [[PTR:0x[0-9a-z]*]]. Reasons: {{.*}}
26+
// CHECK: Device pointer [[PTR]] points into prior host-issued allocation:
27+
// CHECK: Last deallocation:
28+
// CHECK: Last allocation of size 1073741824
29+
// clang-format on
30+
#pragma omp target
31+
{
32+
*P = 5;
33+
}
34+
35+
// CHECK-PASS: PASS
36+
printf("PASS\n");
37+
return 0;
38+
}

0 commit comments

Comments
 (0)