@@ -2688,6 +2688,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26882688 OMPX_APUPrefaultMemcopySize (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY_SIZE" ,
26892689 1 * 1024 * 1024 ), // 1MB
26902690 OMPX_DGPUMaps (" OMPX_DGPU_MAPS" , false ),
2691+ OMPX_EnableDevice2DeviceMemAccess (
2692+ " OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS" , false ),
26912693 AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
26922694 AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
26932695
@@ -4274,6 +4276,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42744276 // / copy on APUs regardless of the setting of HSA_XNACK.
42754277 BoolEnvar OMPX_DGPUMaps;
42764278
4279+ // Determines whether we call HSA API, upon device memory allocation,
4280+ // for making the memory acceccible from other agents.
4281+ // Default is disabled
4282+ BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4283+
42774284 // / Stream manager for AMDGPU streams.
42784285 AMDGPUStreamManagerTy AMDGPUStreamManager;
42794286
@@ -5031,7 +5038,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
50315038 }
50325039 }
50335040
5034- if (Alloc) {
5041+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5042+ OMPX_EnableDevice2DeviceMemAccess)) {
50355043 // Get a list of agents that can access this memory pool. Inherently
50365044 // necessary for host or shared allocations Also enabled for device memory
50375045 // to allow device to device memcpy
0 commit comments