@@ -2688,6 +2688,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26882688 OMPX_APUPrefaultMemcopySize (" LIBOMPTARGET_APU_PREFAULT_MEMCOPY_SIZE" ,
26892689 1 * 1024 * 1024 ), // 1MB
26902690 OMPX_DGPUMaps (" OMPX_DGPU_MAPS" , false ),
2691+ OMPX_EnableDevice2DeviceMemAccess (
2692+ " OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS" , false ),
26912693 AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
26922694 AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
26932695
@@ -4276,6 +4278,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42764278 // / copy on APUs regardless of the setting of HSA_XNACK.
42774279 BoolEnvar OMPX_DGPUMaps;
42784280
4281+ // Determines whether we call HSA API, upon device memory allocation,
4282+ // for making the memory acceccible from other agents.
4283+ // Default is disabled
4284+ BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4285+
42794286 // / Stream manager for AMDGPU streams.
42804287 AMDGPUStreamManagerTy AMDGPUStreamManager;
42814288
@@ -5030,7 +5037,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
50305037 }
50315038 }
50325039
5033- if (Alloc) {
5040+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5041+ OMPX_EnableDevice2DeviceMemAccess)) {
50345042 // Get a list of agents that can access this memory pool. Inherently
50355043 // necessary for host or shared allocations Also enabled for device memory
50365044 // to allow device to device memcpy
0 commit comments