From ed14d418e50f8fdb56f05dcd6cd676f3e2e3af12 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Thu, 26 Sep 2024 00:59:14 -0700 Subject: [PATCH 1/2] fix for multinode --- plugin/sycl/device_manager.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/plugin/sycl/device_manager.cc b/plugin/sycl/device_manager.cc index 0ddbf144083b..75c53e00363c 100644 --- a/plugin/sycl/device_manager.cc +++ b/plugin/sycl/device_manager.cc @@ -20,18 +20,25 @@ ::sycl::device DeviceManager::GetDevice(const DeviceOrd& device_spec) const { (collective::IsDistributed()); if (not_use_default_selector) { DeviceRegister& device_register = GetDevicesRegister(); - const int device_idx = - collective::IsDistributed() ? collective::GetRank() : device_spec.ordinal; if (device_spec.IsSyclDefault()) { auto& devices = device_register.devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, devices.size()); return devices[device_idx]; } else if (device_spec.IsSyclCPU()) { auto& cpu_devices = device_register.cpu_devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % cpu_devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, cpu_devices.size()); return cpu_devices[device_idx]; } else { auto& gpu_devices = device_register.gpu_devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % gpu_devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, gpu_devices.size()); return gpu_devices[device_idx]; } From 924ad64bfe500038b9299cd492c71bd05838190c Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Fri, 11 Oct 2024 01:22:45 -0700 Subject: [PATCH 2/2] one more fix --- plugin/sycl/device_manager.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/plugin/sycl/device_manager.cc b/plugin/sycl/device_manager.cc index 75c53e00363c..021ced67ecaf 100644 --- a/plugin/sycl/device_manager.cc +++ b/plugin/sycl/device_manager.cc @@ -70,18 +70,25 @@ ::sycl::queue DeviceManager::GetQueue(const DeviceOrd& device_spec) const { std::lock_guard guard(queue_registering_mutex); if (not_use_default_selector) { DeviceRegister& device_register = GetDevicesRegister(); - const int device_idx = - collective::IsDistributed() ? collective::GetRank() : device_spec.ordinal; if (device_spec.IsSyclDefault()) { auto& devices = device_register.devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, devices.size()); queue_register[device_spec.Name()] = ::sycl::queue(devices[device_idx]); } else if (device_spec.IsSyclCPU()) { auto& cpu_devices = device_register.cpu_devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % cpu_devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, cpu_devices.size()); queue_register[device_spec.Name()] = ::sycl::queue(cpu_devices[device_idx]); } else if (device_spec.IsSyclGPU()) { auto& gpu_devices = device_register.gpu_devices; + const int device_idx = collective::IsDistributed() + ? collective::GetRank() % gpu_devices.size() + : device_spec.ordinal; CHECK_LT(device_idx, gpu_devices.size()); queue_register[device_spec.Name()] = ::sycl::queue(gpu_devices[device_idx]); }