@@ -2964,6 +2964,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29642964 OMPX_DGPUMaps (" OMPX_DGPU_MAPS" , false ),
29652965 OMPX_SharedDescriptorMaxSize (" LIBOMPTARGET_SHARED_DESCRIPTOR_MAX_SIZE" ,
29662966 96 ),
2967+ OMPX_EnableDevice2DeviceMemAccess (
2968+ " OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS" , false ),
29672969 AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
29682970 AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
29692971
@@ -4557,6 +4559,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
45574559 // / memory. Default value is 48.
45584560 UInt32Envar OMPX_SharedDescriptorMaxSize;
45594561
4562+ // Determines whether we call HSA API, upon device memory allocation,
4563+ // for making the memory acceccible from other agents.
4564+ // Default is disabled
4565+ BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4566+
45604567 // / Stream manager for AMDGPU streams.
45614568 AMDGPUStreamManagerTy AMDGPUStreamManager;
45624569
@@ -5323,7 +5330,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
53235330 }
53245331 }
53255332
5326- if (Alloc) {
5333+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5334+ OMPX_EnableDevice2DeviceMemAccess)) {
53275335 // Get a list of agents that can access this memory pool. Inherently
53285336 // necessary for host or shared allocations Also enabled for device memory
53295337 // to allow device to device memcpy
0 commit comments