@@ -4337,6 +4337,42 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43374337 return Plugin::success ();
43384338 }
43394339
4340+ bool checkIfCoarseGrainMemoryNearOrAbove64GB () {
4341+ for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
4342+ if (Pool->isGlobal () && Pool->isCoarseGrained ()) {
4343+ uint64_t Value;
4344+ hsa_status_t Status =
4345+ Pool->getAttrRaw (HSA_AMD_MEMORY_POOL_INFO_SIZE, Value);
4346+ if (Status != HSA_STATUS_SUCCESS) continue ;
4347+ constexpr uint64_t Almost64Gig = 0xFF0000000 ;
4348+ if (Value >= Almost64Gig) return true ;
4349+ }
4350+ }
4351+ return false ; // CoarseGrain pool w/ 64GB or more capacity not found
4352+ }
4353+
4354+ size_t getMemoryManagerSizeThreshold () override {
4355+ // TODO: check performance on lower memory capacity GPU
4356+ // for lowering the threshold from 64GB.
4357+ if (checkIfCoarseGrainMemoryNearOrAbove64GB ()) {
4358+ // Set GenericDeviceTy::MemoryManager's Threshold to ~2GB,
4359+ // used if not set by LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD
4360+ // ENV var. This MemoryManager is used for
4361+ // omp_target_alloc(), OpenMP (non-usm) map clause, etc.
4362+ //
4363+ // TODO 1: Fine tune to lower the threshold closer to 1GB.
4364+ // TODO 2: HSA-level memory manager on the user-side such that
4365+ // memory management is shared with HIP and OpenCL.
4366+ //
4367+ // If this value needs to go above UINT_MAX, consider
4368+ // adding sizeof(size_t) check to avoid unpleasant truncation
4369+ // surprises where size_t is still 32bit.
4370+ constexpr size_t Almost2Gig = 2000000000u ;
4371+ return Almost2Gig;
4372+ }
4373+ return 0 ;
4374+ }
4375+
43404376 // / Determines if
43414377 // / - Coarse graining upon USM map on MI200 needs to be enabled.
43424378 void specialBehaviorHandling () {
0 commit comments