@@ -2922,8 +2922,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2922
2922
// setting default to true here appears to solve random sdma problem
2923
2923
" LIBOMPTARGET_AMDGPU_USE_MULTIPLE_SDMA_ENGINES" , false ),
2924
2924
OMPX_ApuMaps (" OMPX_APU_MAPS" , false ),
2925
- OMPX_DisableUsmMaps ( " OMPX_DISABLE_USM_MAPS " , true ),
2926
- OMPX_NoMapChecks ( " OMPX_DISABLE_MAPS " , true ),
2925
+ OMPX_EnableGFX90ACoarseGrainUsmMaps (
2926
+ " OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS " , false ),
2927
2927
OMPX_StrictSanityChecks (" OMPX_STRICT_SANITY_CHECKS" , false ),
2928
2928
OMPX_SyncCopyBack (" LIBOMPTARGET_SYNC_COPY_BACK" , true ),
2929
2929
OMPX_APUPrefaultMemcopy (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY" , " true" ),
@@ -3195,7 +3195,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3195
3195
if (auto Err = checkIfMI300x ())
3196
3196
return Err;
3197
3197
3198
- // detect special cases for MI200 and MI300A
3198
+ // detect special cases for MI200
3199
3199
specialBehaviorHandling ();
3200
3200
3201
3201
// detect ROCm-specific environment variables
@@ -3758,8 +3758,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3758
3758
bool set_attr = true ) override final {
3759
3759
// If the table has not yet been created, check if the gpu arch is
3760
3760
// MI200 and create it, but only if USM Map is enabled.
3761
- if (!IsEquippedWithGFX90A || OMPX_DisableUsmMaps )
3762
- return Plugin::success ( );
3761
+ if (!IsEquippedWithGFX90A || !EnableGFX90ACoarseGrainUsmMaps )
3762
+ return Plugin::error ( " Invalid request to set coarse grain mode " );
3763
3763
if (!CoarseGrainMemoryTable)
3764
3764
CoarseGrainMemoryTable = new AMDGPUMemTypeBitFieldTable (
3765
3765
AMDGPU_X86_64_SystemConfiguration::max_addressable_byte +
@@ -4336,21 +4336,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4336
4336
}
4337
4337
4338
4338
// / Determines if
4339
- // / - Map checks should be disabled
4340
- // / - Coarse graining upon map on MI200 needs to be disabled.
4341
- // / - Prefaulting GPU page tables on MI300A needs to be enabled.
4339
+ // / - Coarse graining upon USM map on MI200 needs to be enabled.
4342
4340
void specialBehaviorHandling () {
4343
- if (OMPX_NoMapChecks.get () == false ) {
4344
- NoUSMMapChecks = false ;
4345
- }
4346
-
4347
- if (OMPX_DisableUsmMaps.get () == true ) {
4348
- EnableFineGrainedMemory = true ;
4349
- }
4341
+ EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
4350
4342
}
4351
4343
4352
- bool IsFineGrainedMemoryEnabledImpl () override final {
4353
- return EnableFineGrainedMemory ;
4344
+ bool IsGfx90aCoarseGrainUsmMapEnabledImpl () override final {
4345
+ return !EnableGFX90ACoarseGrainUsmMaps ;
4354
4346
}
4355
4347
4356
4348
bool hasAPUDeviceImpl () override final { return IsAPU; }
@@ -4457,17 +4449,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4457
4449
// / automatic zero-copy behavior on non-APU GPUs.
4458
4450
BoolEnvar OMPX_ApuMaps;
4459
4451
4460
- // / Value of OMPX_DISABLE_USM_MAPS. Use on MI200
4461
- // / systems to disable both device memory
4462
- // / allocations and host-device memory copies upon
4463
- // / map, and coarse graining of mapped variables.
4464
- BoolEnvar OMPX_DisableUsmMaps;
4465
-
4466
- // / Value of OMPX_DISABLE_MAPS. Turns off map table checks
4467
- // / in libomptarget in unified_shared_memory mode. Legacy:
4468
- // / never turned to false (unified_shared_memory mode is
4469
- // / currently always without map checks.
4470
- BoolEnvar OMPX_NoMapChecks;
4452
+ // / Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS.
4453
+ // / Use on MI200 systems to enable coarse graining
4454
+ // / of mapped variables (and other variables partially
4455
+ // / or fully on the same memory page) under unified
4456
+ // / shared memory.
4457
+ // /
4458
+ // / It was enabled by default up to Rocm6.3
4459
+ // / and env var spelling for controlling it was
4460
+ // / OMPX_DISABLE_USM_MAPS
4461
+ BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
4471
4462
4472
4463
// / Makes warnings turn into fatal errors
4473
4464
BoolEnvar OMPX_StrictSanityChecks;
@@ -4552,14 +4543,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4552
4543
// / False otherwise.
4553
4544
bool IsXnackEnabled = false ;
4554
4545
4555
- // Set by OMPX_DISABLE_USM_MAPS environment variable.
4556
- // If set, fine graned memory is used for maps instead of coarse grained.
4557
- bool EnableFineGrainedMemory = false ;
4558
-
4559
- // / Set by OMPX_DISABLE_MAPS environment variable.
4560
- // If false, map checks are performed also in unified_shared_memory mode.
4561
- // TODO: this feature is non functional.
4562
- bool NoUSMMapChecks = true ;
4546
+ // Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS environment variable.
4547
+ // If set, under unified shared memory on MI200, fine grained memory page
4548
+ // is switched to coarse grain (and stay coarse grain) if a variable
4549
+ // residing on the page goes through implicit/explicit OpenMP map.
4550
+ bool EnableGFX90ACoarseGrainUsmMaps = false ;
4563
4551
4564
4552
// / True if in multi-device mode.
4565
4553
bool IsMultiDeviceEnabled = false ;
@@ -5269,10 +5257,10 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
5269
5257
REPORT (" %s\n " , toString (std::move (Err)).data ());
5270
5258
return nullptr ;
5271
5259
}
5272
- // FIXME: Maybe this should be guarded by hasgfx90a
5273
- if (MemoryPool == CoarseGrainedMemoryPools[ 0 ] ) {
5274
- // printf(" Device::allocate calling setCoarseGrainMemoryImpl(Alloc, Size,
5275
- // false)\n");
5260
+ if (MemoryPool == CoarseGrainedMemoryPools[ 0 ] && IsEquippedWithGFX90A &&
5261
+ EnableGFX90ACoarseGrainUsmMaps ) {
5262
+ // Need to register in the coarse grain usm map table
5263
+ // if not already registered.
5276
5264
if (auto Err = setCoarseGrainMemoryImpl (Alloc, Size, /* set_attr=*/ false )) {
5277
5265
REPORT (" %s\n " , toString (std::move (Err)).data ());
5278
5266
return nullptr ;
0 commit comments