@@ -825,24 +825,6 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
825825 return getBuiltURProgram (AllImages, Context, {Device});
826826}
827827
828- template <typename Func>
829- void callFuncForAllSubsets (Func &FuncToCall,
830- const std::set<ur_device_handle_t > &DeviceSet,
831- std::set<ur_device_handle_t > &Subset, int index) {
832- // Add the current subset to the result list
833- if (Subset.size () && Subset.size () != DeviceSet.size ()) {
834- FuncToCall (Subset);
835- }
836-
837- auto it = DeviceSet.begin ();
838- std::advance (it, index);
839- for (size_t i = index; i < DeviceSet.size (); i++, it++) {
840- auto InsertedEntry = Subset.insert (Subset.end (), *it);
841- callFuncForAllSubsets (FuncToCall, DeviceSet, Subset, i + 1 );
842- Subset.erase (InsertedEntry);
843- }
844- }
845-
846828ur_program_handle_t ProgramManager::getBuiltURProgram (
847829 const BinImgWithDeps &ImgWithDeps, const context &Context,
848830 const std::vector<device> &Devs, const DevImgPlainWithDeps *DevImgWithDeps,
@@ -1008,23 +990,34 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
1008990 // emplace all subsets of the current set of devices into the cache.
1009991 // Set of all devices is not included in the loop as it was already added
1010992 // into the cache.
1011- auto ExecuteForAllSubsets =
1012- [&CacheKey, &Cache, &Adapter, &ResProgram,
1013- &CacheLinkedImages](std::set<ur_device_handle_t > &Subset) {
1014- // Change device in the cache key to reduce copying of spec const
1015- // data.
1016- CacheKey.second = Subset;
1017- bool DidInsert = Cache.insertBuiltProgram (CacheKey, ResProgram);
1018- if (DidInsert) {
1019- // For every cached copy of the program, we need to increment its
1020- // refcount
1021- Adapter->call <UrApiKind::urProgramRetain>(ResProgram);
1022- }
1023- CacheLinkedImages ();
1024- };
1025- std::set<ur_device_handle_t > Subset;
1026- int Index = 0 ;
1027- callFuncForAllSubsets (ExecuteForAllSubsets, URDevicesSet, Subset, Index);
993+ int Mask = 1 ;
994+ if (URDevicesSet.size () > sizeof (Mask) * 8 - 1 ) {
995+ // Protection for the algorithm below. Although overflow is very unlikely
996+ // to be reached.
997+ throw sycl::exception (make_error_code (errc::runtime),
998+ " Unable to generate device sets" );
999+ }
1000+ for (; Mask < (1 << URDevicesSet.size ()) - 1 ; ++Mask) {
1001+ std::set<ur_device_handle_t > Subset;
1002+ int Index = 0 ;
1003+ for (auto It = URDevicesSet.begin (); It != URDevicesSet.end ();
1004+ ++It, ++Index) {
1005+ if (Mask & (1 << Index)) {
1006+ Subset.insert (*It);
1007+ }
1008+ }
1009+ // Change device in the cache key to reduce copying of spec const data.
1010+ CacheKey.second = Subset;
1011+ bool DidInsert = Cache.insertBuiltProgram (CacheKey, ResProgram);
1012+ if (DidInsert) {
1013+ // For every cached copy of the program, we need to increment its
1014+ // refcount
1015+ Adapter->call <UrApiKind::urProgramRetain>(ResProgram);
1016+ }
1017+ CacheLinkedImages ();
1018+ // getOrBuild is not supposed to return nullptr
1019+ assert (BuildResult != nullptr && " Invalid build result" );
1020+ }
10281021 }
10291022
10301023 // If caching is enabled, one copy of the program handle will be
0 commit comments