Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/source/detail/helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName,
DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage(
KernelName, Context, Device);
Program = detail::ProgramManager::getInstance().createURProgram(
*DeviceImage, Context, {Device});
*DeviceImage, Context, {std::move(Device)});
}
return {DeviceImage, Program};
}
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/persistent_device_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ std::vector<std::vector<char>> PersistentDeviceCodeCache::getItemFromDisc(
std::vector<std::vector<char>>
PersistentDeviceCodeCache::getCompiledKernelFromDisc(
const std::vector<device> &Devices, const std::string &BuildOptionsString,
const std::string SourceStr) {
const std::string &SourceStr) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think more idiomatic fix would be to use std::string_view.

Copy link
Contributor Author

@KseniyaTikhomirova KseniyaTikhomirova Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am eliminating extra copy here, no intention to change types.

assert(!Devices.empty());
std::vector<std::vector<char>> Binaries(Devices.size());
std::string FileNames;
Expand Down Expand Up @@ -518,7 +518,7 @@ std::string PersistentDeviceCodeCache::getCacheItemPath(

std::string PersistentDeviceCodeCache::getCompiledKernelItemPath(
const device &Device, const std::string &BuildOptionsString,
const std::string SourceString) {
const std::string &SourceString) {

std::string cache_root{getRootDir()};
if (cache_root.empty()) {
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/persistent_device_code_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ class PersistentDeviceCodeCache {
static std::string
getCompiledKernelItemPath(const device &Device,
const std::string &BuildOptionsString,
const std::string SourceString);
const std::string &SourceString);

/* Program binaries built for one or more devices are read from persistent
* cache and returned in form of vector of programs. Each binary program is
Expand All @@ -185,7 +185,7 @@ class PersistentDeviceCodeCache {
static std::vector<std::vector<char>>
getCompiledKernelFromDisc(const std::vector<device> &Devices,
const std::string &BuildOptionsString,
const std::string SourceStr);
const std::string &SourceStr);

/* Stores build program in persistent cache
*/
Expand Down
62 changes: 38 additions & 24 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ static ur_program_handle_t
createBinaryProgram(const ContextImplPtr Context,
const std::vector<device> &Devices,
const uint8_t **Binaries, size_t *Lengths,
const std::vector<ur_program_metadata_t> Metadata) {
const std::vector<ur_program_metadata_t> &Metadata) {
const AdapterPtr &Adapter = Context->getAdapter();
ur_program_handle_t Program;
std::vector<ur_device_handle_t> DeviceHandles;
Expand Down Expand Up @@ -230,7 +230,7 @@ ProgramManager::createURProgram(const RTDeviceBinaryImage &Img,
"SPIR-V online compilation is not supported in this context");

// Get program metadata from properties
auto ProgMetadata = Img.getProgramMetadataUR();
const auto &ProgMetadata = Img.getProgramMetadataUR();

// Load the image
const ContextImplPtr Ctx = getSyclObjImpl(Context);
Expand Down Expand Up @@ -825,6 +825,24 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
return getBuiltURProgram(AllImages, Context, {Device});
}

template <typename Func>
void callFuncForAllSubsets(Func &FuncToCall,
const std::set<ur_device_handle_t> &DeviceSet,
std::set<ur_device_handle_t> &Subset, int index) {
// Add the current subset to the result list
if (Subset.size() && Subset.size() != DeviceSet.size()) {
FuncToCall(Subset);
}

auto it = DeviceSet.begin();
std::advance(it, index);
for (size_t i = index; i < DeviceSet.size(); i++, it++) {
auto InsertedEntry = Subset.insert(Subset.end(), *it);
callFuncForAllSubsets(FuncToCall, DeviceSet, Subset, i + 1);
Subset.erase(InsertedEntry);
}
}

ur_program_handle_t ProgramManager::getBuiltURProgram(
const BinImgWithDeps &ImgWithDeps, const context &Context,
const std::vector<device> &Devs, const DevImgPlainWithDeps *DevImgWithDeps,
Expand Down Expand Up @@ -990,27 +1008,23 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
// emplace all subsets of the current set of devices into the cache.
// Set of all devices is not included in the loop as it was already added
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment ("Set of all devices is not included...") needs to be moved to the line 832, if I understand the logic correctly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed 19983fe

// into the cache.
for (int Mask = 1; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is much more maintainable than the new one. Also, if the problem is the "overflow" here, then it's inherent part of the problem. We'll have that many entries in the resulting set anyway (and likely OOM regardless).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed in person, fixed in 19983fe

std::set<ur_device_handle_t> Subset;
int Index = 0;
for (auto It = URDevicesSet.begin(); It != URDevicesSet.end();
++It, ++Index) {
if (Mask & (1 << Index)) {
Subset.insert(*It);
}
}
// Change device in the cache key to reduce copying of spec const data.
CacheKey.second = Subset;
bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram);
if (DidInsert) {
// For every cached copy of the program, we need to increment its
// refcount
Adapter->call<UrApiKind::urProgramRetain>(ResProgram);
}
CacheLinkedImages();
// getOrBuild is not supposed to return nullptr
assert(BuildResult != nullptr && "Invalid build result");
}
auto ExecuteForAllSubsets =
[&CacheKey, &Cache, &Adapter, &ResProgram,
&CacheLinkedImages](std::set<ur_device_handle_t> &Subset) {
// Change device in the cache key to reduce copying of spec const
// data.
CacheKey.second = Subset;
bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram);
if (DidInsert) {
// For every cached copy of the program, we need to increment its
// refcount
Adapter->call<UrApiKind::urProgramRetain>(ResProgram);
}
CacheLinkedImages();
};
std::set<ur_device_handle_t> Subset;
int Index = 0;
callFuncForAllSubsets(ExecuteForAllSubsets, URDevicesSet, Subset, Index);
}

// If caching is enabled, one copy of the program handle will be
Expand Down Expand Up @@ -1124,7 +1138,7 @@ ProgramManager::getUrProgramFromUrKernel(ur_kernel_handle_t Kernel,

std::string
ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program,
const ContextImplPtr Context) {
const ContextImplPtr &Context) {
size_t URDevicesSize = 0;
const AdapterPtr &Adapter = Context->getAdapter();
Adapter->call<UrApiKind::urProgramGetInfo>(Program, UR_PROGRAM_INFO_DEVICES,
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class ProgramManager {
void addImages(sycl_device_binaries DeviceImages);
void debugPrintBinaryImages() const;
static std::string getProgramBuildLog(const ur_program_handle_t &Program,
const ContextImplPtr Context);
const ContextImplPtr &Context);

uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img);

Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ namespace detail {
template <typename MemOpFuncT, typename... MemOpArgTs>
ur_result_t callMemOpHelper(MemOpFuncT &MemOpFunc, MemOpArgTs &&...MemOpArgs) {
try {
MemOpFunc(MemOpArgs...);
MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...);
} catch (sycl::exception &e) {
return static_cast<ur_result_t>(get_ur_error(e));
}
Expand All @@ -70,7 +70,7 @@ template <typename MemOpRet, typename MemOpFuncT, typename... MemOpArgTs>
ur_result_t callMemOpHelperRet(MemOpRet &MemOpResult, MemOpFuncT &MemOpFunc,
MemOpArgTs &&...MemOpArgs) {
try {
MemOpResult = MemOpFunc(MemOpArgs...);
MemOpResult = MemOpFunc(std::forward<MemOpArgTs>(MemOpArgs)...);
} catch (sycl::exception &e) {
return static_cast<ur_result_t>(get_ur_error(e));
}
Expand Down Expand Up @@ -2891,7 +2891,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() {
&RawEvents[0]);
}

ur_exp_command_buffer_sync_point_t OutSyncPoint;
ur_exp_command_buffer_sync_point_t OutSyncPoint{};
ur_exp_command_buffer_command_handle_t OutCommand = nullptr;
switch (MCommandGroup->getType()) {
case CGType::Kernel: {
Expand Down
Loading