Skip to content

Commit 6a31211

Browse files
hidekisaitoronlieb
authored andcommitted
[Offload][AMDGPU] Rename OMPX_DISABLE_USM_MAPS env var and associated code cleanup
Change-Id: Idfc141d8f8c476b0b08f99903692df47b64b7c8f
1 parent 1264f3b commit 6a31211

File tree

6 files changed

+52
-53
lines changed

6 files changed

+52
-53
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 28 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2922,8 +2922,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29222922
// setting default to true here appears to solve random sdma problem
29232923
"LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES", false),
29242924
OMPX_ApuMaps("OMPX_APU_MAPS", false),
2925-
OMPX_DisableUsmMaps("OMPX_DISABLE_USM_MAPS", true),
2926-
OMPX_NoMapChecks("OMPX_DISABLE_MAPS", true),
2925+
OMPX_EnableGFX90ACoarseGrainUsmMaps(
2926+
"OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS", false),
29272927
OMPX_StrictSanityChecks("OMPX_STRICT_SANITY_CHECKS", false),
29282928
OMPX_SyncCopyBack("LIBOMPTARGET_SYNC_COPY_BACK", true),
29292929
OMPX_APUPrefaultMemcopy("LIBOMPTARGET_APU_PREFAULT_MEMCOPY", "true"),
@@ -3195,7 +3195,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31953195
if (auto Err = checkIfMI300x())
31963196
return Err;
31973197

3198-
// detect special cases for MI200 and MI300A
3198+
// detect special cases for MI200
31993199
specialBehaviorHandling();
32003200

32013201
// detect ROCm-specific environment variables
@@ -3758,8 +3758,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37583758
bool set_attr = true) override final {
37593759
// If the table has not yet been created, check if the gpu arch is
37603760
// MI200 and create it, but only if USM Map is enabled.
3761-
if (!IsEquippedWithGFX90A || OMPX_DisableUsmMaps)
3762-
return Plugin::success();
3761+
if (!IsEquippedWithGFX90A || !EnableGFX90ACoarseGrainUsmMaps)
3762+
return Plugin::error("Invalid request to set coarse grain mode");
37633763
if (!CoarseGrainMemoryTable)
37643764
CoarseGrainMemoryTable = new AMDGPUMemTypeBitFieldTable(
37653765
AMDGPU_X86_64_SystemConfiguration::max_addressable_byte +
@@ -4336,21 +4336,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43364336
}
43374337

43384338
/// Determines if
4339-
/// - Map checks should be disabled
4340-
/// - Coarse graining upon map on MI200 needs to be disabled.
4341-
/// - Prefaulting GPU page tables on MI300A needs to be enabled.
4339+
/// - Coarse graining upon USM map on MI200 needs to be enabled.
43424340
void specialBehaviorHandling() {
4343-
if (OMPX_NoMapChecks.get() == false) {
4344-
NoUSMMapChecks = false;
4345-
}
4346-
4347-
if (OMPX_DisableUsmMaps.get() == true) {
4348-
EnableFineGrainedMemory = true;
4349-
}
4341+
EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
43504342
}
43514343

4352-
bool IsFineGrainedMemoryEnabledImpl() override final {
4353-
return EnableFineGrainedMemory;
4344+
bool IsGfx90aCoarseGrainUsmMapEnabledImpl() override final {
4345+
return !EnableGFX90ACoarseGrainUsmMaps;
43544346
}
43554347

43564348
bool hasAPUDeviceImpl() override final { return IsAPU; }
@@ -4457,17 +4449,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
44574449
/// automatic zero-copy behavior on non-APU GPUs.
44584450
BoolEnvar OMPX_ApuMaps;
44594451

4460-
/// Value of OMPX_DISABLE_USM_MAPS. Use on MI200
4461-
/// systems to disable both device memory
4462-
/// allocations and host-device memory copies upon
4463-
/// map, and coarse graining of mapped variables.
4464-
BoolEnvar OMPX_DisableUsmMaps;
4465-
4466-
/// Value of OMPX_DISABLE_MAPS. Turns off map table checks
4467-
/// in libomptarget in unified_shared_memory mode. Legacy:
4468-
/// never turned to false (unified_shared_memory mode is
4469-
/// currently always without map checks.
4470-
BoolEnvar OMPX_NoMapChecks;
4452+
/// Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS.
4453+
/// Use on MI200 systems to enable coarse graining
4454+
/// of mapped variables (and other variables partially
4455+
/// or fully on the same memory page) under unified
4456+
/// shared memory.
4457+
///
4458+
/// It was enabled by default up to Rocm6.3
4459+
/// and env var spelling for controlling it was
4460+
/// OMPX_DISABLE_USM_MAPS
4461+
BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
44714462

44724463
/// Makes warnings turn into fatal errors
44734464
BoolEnvar OMPX_StrictSanityChecks;
@@ -4552,14 +4543,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
45524543
/// False otherwise.
45534544
bool IsXnackEnabled = false;
45544545

4555-
// Set by OMPX_DISABLE_USM_MAPS environment variable.
4556-
// If set, fine graned memory is used for maps instead of coarse grained.
4557-
bool EnableFineGrainedMemory = false;
4558-
4559-
/// Set by OMPX_DISABLE_MAPS environment variable.
4560-
// If false, map checks are performed also in unified_shared_memory mode.
4561-
// TODO: this feature is non functional.
4562-
bool NoUSMMapChecks = true;
4546+
// Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS environment variable.
4547+
// If set, under unified shared memory on MI200, fine grained memory page
4548+
// is switched to coarse grain (and stay coarse grain) if a variable
4549+
// residing on the page goes through implicit/explicit OpenMP map.
4550+
bool EnableGFX90ACoarseGrainUsmMaps = false;
45634551

45644552
/// True if in multi-device mode.
45654553
bool IsMultiDeviceEnabled = false;
@@ -5269,10 +5257,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
52695257
REPORT("%s\n", toString(std::move(Err)).data());
52705258
return nullptr;
52715259
}
5272-
// FIXME: Maybe this should be guarded by hasgfx90a
5273-
if (MemoryPool == CoarseGrainedMemoryPools[0]) {
5274-
// printf(" Device::allocate calling setCoarseGrainMemoryImpl(Alloc, Size,
5275-
// false)\n");
5260+
if (MemoryPool == CoarseGrainedMemoryPools[0] && IsEquippedWithGFX90A &&
5261+
EnableGFX90ACoarseGrainUsmMaps) {
5262+
// Need to register in the coarse grain usm map table
5263+
// if not already registered.
52765264
if (auto Err = setCoarseGrainMemoryImpl(Alloc, Size, /*set_attr=*/false)) {
52775265
REPORT("%s\n", toString(std::move(Err)).data());
52785266
return nullptr;

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -956,10 +956,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
956956
virtual bool supportsUnifiedMemoryImpl() { return false; }
957957

958958
// Returns true if coarse graining of mapped variables is
959-
// disabled on MI200 GPUs.
960-
// virtual bool IsFineGrainedMemoryEnabled() { return false; }
961-
bool IsFineGrainedMemoryEnabled();
962-
virtual bool IsFineGrainedMemoryEnabledImpl() { return false; }
959+
// enabled on MI200 GPUs.
960+
// virtual bool IsGfx90aCoarseGrainUsmMapEnabled() { return false; }
961+
bool IsGfx90aCoarseGrainUsmMapEnabled();
962+
virtual bool IsGfx90aCoarseGrainUsmMapEnabledImpl() { return false; }
963963

964964
/// Create an event.
965965
Error createEvent(void **EventPtrStorage);
@@ -1446,8 +1446,9 @@ struct GenericPluginTy {
14461446
/// Returns if this device supports USM.
14471447
bool supports_unified_memory(int32_t DeviceId);
14481448

1449-
/// Returns if fine grained memory is supported.
1450-
bool is_fine_grained_memory_enabled(int32_t DeviceId);
1449+
/// Returns if GFX90A coarse graining of OpenMP mapped
1450+
/// variables is enabled under unified shared memory.
1451+
bool is_gfx90a_coarse_grain_usm_map_enabled(int32_t DeviceId);
14511452

14521453
/// Returns if managed memory is supported.
14531454
bool is_system_supporting_managed_memory(int32_t DeviceId);

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,8 +1773,8 @@ bool GenericDeviceTy::supportsUnifiedMemory() {
17731773
return supportsUnifiedMemoryImpl();
17741774
}
17751775

1776-
bool GenericDeviceTy::IsFineGrainedMemoryEnabled() {
1777-
return IsFineGrainedMemoryEnabledImpl();
1776+
bool GenericDeviceTy::IsGfx90aCoarseGrainUsmMapEnabled() {
1777+
return IsGfx90aCoarseGrainUsmMapEnabledImpl();
17781778
}
17791779

17801780
Error GenericDeviceTy::prepopulatePageTable(void *ptr, int64_t size) {
@@ -2108,9 +2108,11 @@ bool GenericPluginTy::supports_unified_memory(int32_t DeviceId) {
21082108
return R;
21092109
}
21102110

2111-
bool GenericPluginTy::is_fine_grained_memory_enabled(int32_t DeviceId) {
2111+
bool GenericPluginTy::is_gfx90a_coarse_grain_usm_map_enabled(int32_t DeviceId) {
21122112
auto T = logger::log<bool>(__func__, DeviceId);
2113-
auto R = [&]() { return getDevice(DeviceId).IsFineGrainedMemoryEnabled(); }();
2113+
auto R = [&]() {
2114+
return getDevice(DeviceId).IsGfx90aCoarseGrainUsmMapEnabled();
2115+
}();
21142116
T.res(R);
21152117
return R;
21162118
}

offload/src/OpenMP/API.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,11 @@ EXTERN void omp_register_coarse_grain_mem(void *ptr, size_t size, int setattr) {
696696
FATAL_MESSAGE(omp_get_default_device(), "%s",
697697
toString(DeviceOrErr.takeError()).c_str());
698698

699+
if (!(DeviceOrErr->RTL->is_gfx90a(omp_get_default_device()) &&
700+
DeviceOrErr->RTL->is_gfx90a_coarse_grain_usm_map_enabled(
701+
omp_get_default_device())))
702+
return;
703+
699704
bool set_attr = (setattr == 1) ? true : false;
700705
DeviceOrErr->RTL->set_coarse_grain_mem(omp_get_default_device(), ptr, size,
701706
set_attr);

offload/src/OpenMP/Mapping.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
274274
// memory as coarse-grained. The usage of coarse-grained memory can be
275275
// overriden by setting the env-var OMPX_DISABLE_USM_MAPS=1.
276276
if (Device.RTL->is_gfx90a(Device.DeviceID) && HstPtrBegin &&
277-
(!Device.RTL->is_fine_grained_memory_enabled(Device.DeviceID))) {
277+
(!Device.RTL->is_gfx90a_coarse_grain_usm_map_enabled(
278+
Device.DeviceID))) {
278279
Device.RTL->set_coarse_grain_mem_region(Device.DeviceID, HstPtrBegin,
279280
Size);
280281
INFO(OMP_INFOTYPE_MAPPING_CHANGED, Device.DeviceID,

offload/test/mapping/coarse_grain.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// RUN: | %fcheck-generic -check-prefix=CHECK_FINE
55

66
// RUN: %libomptarget-compilexx-generic
7-
// RUN: env OMPX_DISABLE_USM_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
7+
// RUN: env OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS=0 OMPX_DISABLE_USM_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=30 \
8+
// RUN: %libomptarget-run-generic 2>&1 \
89
// RUN: | %fcheck-generic -check-prefix=CHECK_FINE
910

1011
// RUN: %libomptarget-compilexx-generic
11-
// RUN: env OMPX_DISABLE_USM_MAPS=0 HSA_XNACK=1 LIBOMPTARGET_INFO=30 %libomptarget-run-generic 2>&1 \
12+
// RUN: env OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS=1 OMPX_DISABLE_USM_MAPS=0 HSA_XNACK=1 LIBOMPTARGET_INFO=30 \
13+
// RUN: %libomptarget-run-generic 2>&1 \
1214
// RUN: | %fcheck-generic -check-prefix=CHECK
1315

1416
// UNSUPPORTED: aarch64-unknown-linux-gnu

0 commit comments

Comments
 (0)