From 35969a04f7ef4d71b66d78789657a3d590d75619 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 11 Dec 2024 04:50:05 -0800 Subject: [PATCH 1/5] [SYCL] Fix warnings in source/detail code Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/helpers.cpp | 2 +- .../detail/persistent_device_code_cache.cpp | 4 +- .../detail/persistent_device_code_cache.hpp | 4 +- .../program_manager/program_manager.cpp | 62 ++++++++++++------- .../program_manager/program_manager.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 6 +- 6 files changed, 47 insertions(+), 33 deletions(-) diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 5d5afbed51fd3..4bae5c59bb6bb 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -94,7 +94,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( KernelName, Context, Device); Program = detail::ProgramManager::getInstance().createURProgram( - *DeviceImage, Context, {Device}); + *DeviceImage, Context, {std::move(Device)}); } return {DeviceImage, Program}; } diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index 205ebd7d42d26..a86e727dcca3a 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -320,7 +320,7 @@ std::vector> PersistentDeviceCodeCache::getItemFromDisc( std::vector> PersistentDeviceCodeCache::getCompiledKernelFromDisc( const std::vector &Devices, const std::string &BuildOptionsString, - const std::string SourceStr) { + const std::string &SourceStr) { assert(!Devices.empty()); std::vector> Binaries(Devices.size()); std::string FileNames; @@ -518,7 +518,7 @@ std::string PersistentDeviceCodeCache::getCacheItemPath( std::string PersistentDeviceCodeCache::getCompiledKernelItemPath( const device &Device, const std::string &BuildOptionsString, - const std::string SourceString) { + const std::string &SourceString) { std::string cache_root{getRootDir()}; if (cache_root.empty()) { diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index 78441a251aa75..24cc0bfad83f1 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -170,7 +170,7 @@ class PersistentDeviceCodeCache { static std::string getCompiledKernelItemPath(const device &Device, const std::string &BuildOptionsString, - const std::string SourceString); + const std::string &SourceString); /* Program binaries built for one or more devices are read from persistent * cache and returned in form of vector of programs. Each binary program is @@ -185,7 +185,7 @@ class PersistentDeviceCodeCache { static std::vector> getCompiledKernelFromDisc(const std::vector &Devices, const std::string &BuildOptionsString, - const std::string SourceStr); + const std::string &SourceStr); /* Stores build program in persistent cache */ diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 035ca965ce2e5..fa5967a257efd 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -76,7 +76,7 @@ static ur_program_handle_t createBinaryProgram(const ContextImplPtr Context, const std::vector &Devices, const uint8_t **Binaries, size_t *Lengths, - const std::vector Metadata) { + const std::vector &Metadata) { const AdapterPtr &Adapter = Context->getAdapter(); ur_program_handle_t Program; std::vector DeviceHandles; @@ -230,7 +230,7 @@ ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, "SPIR-V online compilation is not supported in this context"); // Get program metadata from properties - auto ProgMetadata = Img.getProgramMetadataUR(); + const auto &ProgMetadata = Img.getProgramMetadataUR(); // Load the image const ContextImplPtr Ctx = getSyclObjImpl(Context); @@ -825,6 +825,24 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( return getBuiltURProgram(AllImages, Context, {Device}); } +template +void callFuncForAllSubsets(Func &FuncToCall, + const std::set &DeviceSet, + std::set &Subset, int index) { + // Add the current subset to the result list + if (Subset.size() && Subset.size() != DeviceSet.size()) { + FuncToCall(Subset); + } + + auto it = DeviceSet.begin(); + std::advance(it, index); + for (int i = index; i < DeviceSet.size(); i++, it++) { + auto InsertedEntry = Subset.insert(Subset.end(), *it); + callFuncForAllSubsets(FuncToCall, DeviceSet, Subset, i + 1); + Subset.erase(InsertedEntry); + } +}; + ur_program_handle_t ProgramManager::getBuiltURProgram( const BinImgWithDeps &ImgWithDeps, const context &Context, const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps, @@ -990,27 +1008,23 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // emplace all subsets of the current set of devices into the cache. // Set of all devices is not included in the loop as it was already added // into the cache. - for (int Mask = 1; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) { - std::set Subset; - int Index = 0; - for (auto It = URDevicesSet.begin(); It != URDevicesSet.end(); - ++It, ++Index) { - if (Mask & (1 << Index)) { - Subset.insert(*It); - } - } - // Change device in the cache key to reduce copying of spec const data. - CacheKey.second = Subset; - bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); - if (DidInsert) { - // For every cached copy of the program, we need to increment its - // refcount - Adapter->call(ResProgram); - } - CacheLinkedImages(); - // getOrBuild is not supposed to return nullptr - assert(BuildResult != nullptr && "Invalid build result"); - } + auto ExecuteForAllSubsets = + [&CacheKey, &Cache, &Adapter, &ResProgram, + &CacheLinkedImages](std::set &Subset) { + // Change device in the cache key to reduce copying of spec const + // data. + CacheKey.second = Subset; + bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); + if (DidInsert) { + // For every cached copy of the program, we need to increment its + // refcount + Adapter->call(ResProgram); + } + CacheLinkedImages(); + }; + std::set Subset; + int Index = 0; + callFuncForAllSubsets(ExecuteForAllSubsets, URDevicesSet, Subset, Index); } // If caching is enabled, one copy of the program handle will be @@ -1124,7 +1138,7 @@ ProgramManager::getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, std::string ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, - const ContextImplPtr Context) { + const ContextImplPtr &Context) { size_t URDevicesSize = 0; const AdapterPtr &Adapter = Context->getAdapter(); Adapter->call(Program, UR_PROGRAM_INFO_DEVICES, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index abfdb1144105b..edc84d43ec153 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -213,7 +213,7 @@ class ProgramManager { void addImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; static std::string getProgramBuildLog(const ur_program_handle_t &Program, - const ContextImplPtr Context); + const ContextImplPtr &Context); uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index bf55db9f33909..6ee7d1c07792a 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -59,7 +59,7 @@ namespace detail { template ur_result_t callMemOpHelper(MemOpFuncT &MemOpFunc, MemOpArgTs &&...MemOpArgs) { try { - MemOpFunc(MemOpArgs...); + MemOpFunc(std::forward(MemOpArgs)...); } catch (sycl::exception &e) { return static_cast(get_ur_error(e)); } @@ -70,7 +70,7 @@ template ur_result_t callMemOpHelperRet(MemOpRet &MemOpResult, MemOpFuncT &MemOpFunc, MemOpArgTs &&...MemOpArgs) { try { - MemOpResult = MemOpFunc(MemOpArgs...); + MemOpResult = MemOpFunc(std::forward(MemOpArgs)...); } catch (sycl::exception &e) { return static_cast(get_ur_error(e)); } @@ -2891,7 +2891,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { &RawEvents[0]); } - ur_exp_command_buffer_sync_point_t OutSyncPoint; + ur_exp_command_buffer_sync_point_t OutSyncPoint{}; ur_exp_command_buffer_command_handle_t OutCommand = nullptr; switch (MCommandGroup->getType()) { case CGType::Kernel: { From fed722979d88ef00025d74586203b91c21958b6b Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 11 Dec 2024 04:58:47 -0800 Subject: [PATCH 2/5] fix build Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/program_manager/program_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fa5967a257efd..aee570c9a2445 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -836,7 +836,7 @@ void callFuncForAllSubsets(Func &FuncToCall, auto it = DeviceSet.begin(); std::advance(it, index); - for (int i = index; i < DeviceSet.size(); i++, it++) { + for (size_t i = index; i < DeviceSet.size(); i++, it++) { auto InsertedEntry = Subset.insert(Subset.end(), *it); callFuncForAllSubsets(FuncToCall, DeviceSet, Subset, i + 1); Subset.erase(InsertedEntry); From 56de4605cf15afd3652ad86d69186cb132316a29 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 11 Dec 2024 05:05:21 -0800 Subject: [PATCH 3/5] fix build one more time Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/program_manager/program_manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index aee570c9a2445..0141158030efc 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -841,7 +841,7 @@ void callFuncForAllSubsets(Func &FuncToCall, callFuncForAllSubsets(FuncToCall, DeviceSet, Subset, i + 1); Subset.erase(InsertedEntry); } -}; +} ur_program_handle_t ProgramManager::getBuiltURProgram( const BinImgWithDeps &ImgWithDeps, const context &Context, From 19983fe9bd65e3f9008cd3e244886c89dd547a3d Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 11 Dec 2024 09:41:22 -0800 Subject: [PATCH 4/5] fix code-review comments Signed-off-by: Tikhomirova, Kseniya --- .../program_manager/program_manager.cpp | 63 +++++++++---------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 0141158030efc..127428b67ea19 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -825,24 +825,6 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( return getBuiltURProgram(AllImages, Context, {Device}); } -template -void callFuncForAllSubsets(Func &FuncToCall, - const std::set &DeviceSet, - std::set &Subset, int index) { - // Add the current subset to the result list - if (Subset.size() && Subset.size() != DeviceSet.size()) { - FuncToCall(Subset); - } - - auto it = DeviceSet.begin(); - std::advance(it, index); - for (size_t i = index; i < DeviceSet.size(); i++, it++) { - auto InsertedEntry = Subset.insert(Subset.end(), *it); - callFuncForAllSubsets(FuncToCall, DeviceSet, Subset, i + 1); - Subset.erase(InsertedEntry); - } -} - ur_program_handle_t ProgramManager::getBuiltURProgram( const BinImgWithDeps &ImgWithDeps, const context &Context, const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps, @@ -1008,23 +990,34 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // emplace all subsets of the current set of devices into the cache. // Set of all devices is not included in the loop as it was already added // into the cache. - auto ExecuteForAllSubsets = - [&CacheKey, &Cache, &Adapter, &ResProgram, - &CacheLinkedImages](std::set &Subset) { - // Change device in the cache key to reduce copying of spec const - // data. - CacheKey.second = Subset; - bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); - if (DidInsert) { - // For every cached copy of the program, we need to increment its - // refcount - Adapter->call(ResProgram); - } - CacheLinkedImages(); - }; - std::set Subset; - int Index = 0; - callFuncForAllSubsets(ExecuteForAllSubsets, URDevicesSet, Subset, Index); + int Mask = 1; + if (URDevicesSet.size() > sizeof(Mask) * 8 - 1) { + // Protection for the algorithm below. Although overflow is very unlikely + // to be reached. + throw sycl::exception(make_error_code(errc::runtime), + "Unable to generate device sets"); + } + for (; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) { + std::set Subset; + int Index = 0; + for (auto It = URDevicesSet.begin(); It != URDevicesSet.end(); + ++It, ++Index) { + if (Mask & (1 << Index)) { + Subset.insert(*It); + } + } + // Change device in the cache key to reduce copying of spec const data. + CacheKey.second = Subset; + bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); + if (DidInsert) { + // For every cached copy of the program, we need to increment its + // refcount + Adapter->call(ResProgram); + } + CacheLinkedImages(); + // getOrBuild is not supposed to return nullptr + assert(BuildResult != nullptr && "Invalid build result"); + } } // If caching is enabled, one copy of the program handle will be From 637ad26731eb701dbe810dc9d2df9af1c24e8ace Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 12 Dec 2024 04:23:10 -0800 Subject: [PATCH 5/5] made message more specific about limitation issue Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/program_manager/program_manager.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 127428b67ea19..2a7b494f4d84c 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -994,8 +994,9 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( if (URDevicesSet.size() > sizeof(Mask) * 8 - 1) { // Protection for the algorithm below. Although overflow is very unlikely // to be reached. - throw sycl::exception(make_error_code(errc::runtime), - "Unable to generate device sets"); + throw sycl::exception( + make_error_code(errc::runtime), + "Unable to cache built program for more than 31 devices"); } for (; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) { std::set Subset;