@@ -2924,6 +2924,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29242924 OMPX_ApuMaps (" OMPX_APU_MAPS" , false ),
29252925 OMPX_EnableGFX90ACoarseGrainUsmMaps (
29262926 " OMPX_ENABLE_GFX90A_COARSE_GRAIN_USM_MAPS" , false ),
2927+ OMPX_EnableGFX90ACoarseGrainSharedAlloc (
2928+ " OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC" , false ),
29272929 OMPX_StrictSanityChecks (" OMPX_STRICT_SANITY_CHECKS" , false ),
29282930 OMPX_SyncCopyBack (" LIBOMPTARGET_SYNC_COPY_BACK" , true ),
29292931 OMPX_APUPrefaultMemcopy (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY" , " true" ),
@@ -4339,6 +4341,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43394341 // / - Coarse graining upon USM map on MI200 needs to be enabled.
43404342 void specialBehaviorHandling () {
43414343 EnableGFX90ACoarseGrainUsmMaps = OMPX_EnableGFX90ACoarseGrainUsmMaps;
4344+ EnableGFX90ACoarseGrainSharedAlloc =
4345+ OMPX_EnableGFX90ACoarseGrainSharedAlloc;
43424346 }
43434347
43444348 bool IsGfx90aCoarseGrainUsmMapEnabledImpl () override final {
@@ -4460,6 +4464,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
44604464 // / OMPX_DISABLE_USM_MAPS
44614465 BoolEnvar OMPX_EnableGFX90ACoarseGrainUsmMaps;
44624466
4467+ // / Value of OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC.
4468+ // / Use on MI200 systems to enable coarse grain
4469+ // / allocation of TARGET_ALLOC_SHARED memory.
4470+ // / Default is fine grain allocation.
4471+ BoolEnvar OMPX_EnableGFX90ACoarseGrainSharedAlloc;
4472+
44634473 // / Makes warnings turn into fatal errors
44644474 BoolEnvar OMPX_StrictSanityChecks;
44654475
@@ -4549,6 +4559,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
45494559 // residing on the page goes through implicit/explicit OpenMP map.
45504560 bool EnableGFX90ACoarseGrainUsmMaps = false ;
45514561
4562+ // Set by OMPX_ENABLE_GFX90A_COARSE_GRAIN_SHARED_ALLOC environment variable.
4563+ // If set, TARGET_ALLOC_SHARED is allocated on coarse grain memory on MI200
4564+ bool EnableGFX90ACoarseGrainSharedAlloc = false ;
4565+
45524566 // / True if in multi-device mode.
45534567 bool IsMultiDeviceEnabled = false ;
45544568
@@ -5246,6 +5260,11 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
52465260 break ;
52475261 }
52485262
5263+ if (Kind == TARGET_ALLOC_SHARED && IsEquippedWithGFX90A &&
5264+ EnableGFX90ACoarseGrainSharedAlloc) {
5265+ MemoryPool = CoarseGrainedMemoryPools[0 ];
5266+ }
5267+
52495268 if (!MemoryPool) {
52505269 REPORT (" No memory pool for the specified allocation kind\n " );
52515270 return nullptr ;
0 commit comments