From 3dfcdfde5a79bfc587f3a5e45789b66e9e6a26ff Mon Sep 17 00:00:00 2001 From: Winston Zhang Date: Wed, 18 Sep 2024 11:10:35 -0700 Subject: [PATCH] [L0] zeDeviceCanAccessPeer call optimization The more cards used, zeDeviceCanAccessPeer time costing is becoming more and more expensive. Improved by caching p2p devices status thus reducing the number of zeDeviceCanAccessPeer calling times. Signed-off-by: Winston Zhang --- source/adapters/level_zero/context.hpp | 3 +++ source/adapters/level_zero/usm.cpp | 29 ++++++++++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index c2fbba633f..e7c0d784a0 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -100,6 +100,9 @@ struct ur_context_handle_t_ : _ur_object { l0_command_list_cache_info>>> ZeCopyCommandListCache; + std::unordered_map> + P2PDeviceCache; + // Store USM pool for USM shared and device allocations. There is 1 memory // pool per each pair of (context, device) per each memory type. std::unordered_map diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 5296391794..b5e7a0242b 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -154,15 +154,26 @@ static ur_result_t USMAllocationMakeResident( } else { Devices.push_back(Device); if (ForceResidency == USMAllocationForceResidencyType::P2PDevices) { - ze_bool_t P2P; - for (const auto &D : Context->Devices) { - if (D == Device) - continue; - // TODO: Cache P2P devices for a context - ZE2UR_CALL(zeDeviceCanAccessPeer, - (D->ZeDevice, Device->ZeDevice, &P2P)); - if (P2P) - Devices.push_back(D); + // Check if the P2P devices are already cached + auto it = Context->P2PDeviceCache.find(Device); + if (it != Context->P2PDeviceCache.end()) { + // Use cached P2P devices + Devices.insert(Devices.end(), it->second.begin(), it->second.end()); + } else { + // Query for P2P devices and update the cache + std::list P2PDevices; + ze_bool_t P2P; + for (const auto &D : Context->Devices) { + if (D == Device) + continue; + ZE2UR_CALL(zeDeviceCanAccessPeer, + (D->ZeDevice, Device->ZeDevice, &P2P)); + if (P2P) + P2PDevices.push_back(D); + } + // Update the cache + Context->P2PDeviceCache[Device] = P2PDevices; + Devices.insert(Devices.end(), P2PDevices.begin(), P2PDevices.end()); } } }