Skip to content

Commit ae12a70

Browse files
hidekisaitoronlieb
authored andcommitted
Add Env var control on enabling device-to-device memory access
1 parent 820edf4 commit ae12a70

File tree

1 file changed

+9
-1
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+9
-1
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26882688
OMPX_APUPrefaultMemcopySize("LIBOMPTARGET_APU_PREFAULT_MEMCOPY_SIZE",
26892689
1 * 1024 * 1024), // 1MB
26902690
OMPX_DGPUMaps("OMPX_DGPU_MAPS", false),
2691+
OMPX_EnableDevice2DeviceMemAccess(
2692+
"OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS", false),
26912693
AMDGPUStreamManager(*this, Agent), AMDGPUEventManager(*this),
26922694
AMDGPUSignalManager(*this), Agent(Agent), HostDevice(HostDevice) {}
26932695

@@ -4276,6 +4278,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42764278
/// copy on APUs regardless of the setting of HSA_XNACK.
42774279
BoolEnvar OMPX_DGPUMaps;
42784280

4281+
// Determines whether we call HSA API, upon device memory allocation,
4282+
// for making the memory acceccible from other agents.
4283+
// Default is disabled
4284+
BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4285+
42794286
/// Stream manager for AMDGPU streams.
42804287
AMDGPUStreamManagerTy AMDGPUStreamManager;
42814288

@@ -5030,7 +5037,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
50305037
}
50315038
}
50325039

5033-
if (Alloc) {
5040+
if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5041+
OMPX_EnableDevice2DeviceMemAccess)) {
50345042
// Get a list of agents that can access this memory pool. Inherently
50355043
// necessary for host or shared allocations Also enabled for device memory
50365044
// to allow device to device memcpy

0 commit comments

Comments
 (0)