Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/source/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ make_kernel_bundle(ur_native_handle_t NativeHandle,
sizeof(ur_device_handle_t) * NumDevices, ProgramDevices.data(), nullptr);

for (auto &Dev : ProgramDevices) {
ur_program_binary_type_t BinaryType;
ur_program_binary_type_t BinaryType = UR_PROGRAM_BINARY_TYPE_NONE;
Adapter.call<UrApiKind::urProgramGetBuildInfo>(
UrProgram, Dev, UR_PROGRAM_BUILD_INFO_BINARY_TYPE,
sizeof(ur_program_binary_type_t), &BinaryType, nullptr);
Expand Down
6 changes: 5 additions & 1 deletion sycl/source/detail/adapter_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,11 @@ template <typename URResource> class Managed {
if (!R)
return;

Adapter->call<Release>(R);
try {
Adapter->call<Release>(R);
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~Managed", e);
}
}

Managed retain() {
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ std::vector<ur_event_handle_t> context_impl::initializeDeviceGlobals(
}
// Write the pointer to the device global and store the event in the
// initialize events list.
ur_event_handle_t InitEvent;
ur_event_handle_t InitEvent = nullptr;
void *const &USMPtr = DeviceGlobalUSM.getPtr();
Adapter.call<UrApiKind::urEnqueueDeviceGlobalVariableWrite>(
QueueImpl.getHandleRef(), NativePrg,
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/device_binary_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,10 @@ mergeDeviceRequirements(const std::vector<const RTDeviceBinaryImage *> &Imgs) {
size_t Pos = 0;
do {
const size_t NextPos = Contents.find(';', Pos);
if (NextPos == std::string::npos) {
Set.emplace(Contents.substr(Pos));
break;
}
if (NextPos != Pos)
Set.emplace(Contents.substr(Pos, NextPos - Pos));
Pos = NextPos + 1;
Expand Down
37 changes: 21 additions & 16 deletions sycl/source/detail/device_global_map_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,28 @@ inline namespace _V1 {
namespace detail {

DeviceGlobalUSMMem::~DeviceGlobalUSMMem() {
// removeAssociatedResources is expected to have cleaned up both the pointer
// and the event. When asserts are enabled the values are set, so we check
// these here.
auto ContextImplPtr = MAllocatingContext.lock();
if (ContextImplPtr) {
if (MPtr != nullptr) {
detail::usm::freeInternal(MPtr, ContextImplPtr.get());
MPtr = nullptr;
try {
// removeAssociatedResources is expected to have cleaned up both the pointer
// and the event. When asserts are enabled the values are set, so we check
// these here.
auto ContextImplPtr = MAllocatingContext.lock();
if (ContextImplPtr) {
if (MPtr != nullptr) {
detail::usm::freeInternal(MPtr, ContextImplPtr.get());
MPtr = nullptr;
}
if (MInitEvent != nullptr) {
ContextImplPtr->getAdapter().call<UrApiKind::urEventRelease>(
MInitEvent);
MInitEvent = nullptr;
}
}
if (MInitEvent != nullptr) {
ContextImplPtr->getAdapter().call<UrApiKind::urEventRelease>(MInitEvent);
MInitEvent = nullptr;
}
}

assert(MPtr == nullptr && "MPtr has not been cleaned up.");
assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up.");
assert(MPtr == nullptr && "MPtr has not been cleaned up.");
assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up.");
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~DeviceGlobalUSMMem", e);
}
}

OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) {
Expand Down Expand Up @@ -80,7 +85,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) {
// Initialize here and save the event.
{
std::lock_guard<std::mutex> Lock(NewAlloc.MInitEventMutex);
ur_event_handle_t InitEvent;
ur_event_handle_t InitEvent = nullptr;
if (MDeviceGlobalPtr) {
// C++ guarantees members appear in memory in the order they are declared,
// so since the member variable that contains the initial contents of the
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/device_image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ class device_image_impl
updateSpecConstSymMap();
}

device_image_impl(const std::string &Src, context Context,
device_image_impl(const std::string &Src, const context &Context,
devices_range Devices, syclex::source_language Lang,
include_pairs_t &&IncludePairsVec, private_tag)
: MBinImage(Src), MContext(Context),
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
return {Error};
return {Result};
} else {
ur_ret_t Result;
ur_ret_t Result{};
ur_result_t Error = getAdapter().call_nocheck<UrApiKind::urDeviceGetInfo>(
getHandleRef(), Desc, sizeof(Result), &Result, nullptr);
if (Error == UR_RESULT_SUCCESS)
Expand Down Expand Up @@ -220,7 +220,7 @@ class device_impl : public std::enable_shared_from_this<device_impl> {
getHandleRef(), Desc, ResultSize, Result.data(), nullptr);
return Result;
} else {
ur_ret_t Result;
ur_ret_t Result{};
getAdapter().call<UrApiKind::urDeviceGetInfo>(
getHandleRef(), Desc, sizeof(Result), &Result, nullptr);
return Result;
Expand Down
3 changes: 2 additions & 1 deletion sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ class event_impl {
ur_exp_command_buffer_sync_point_t getSyncPoint() const { return MSyncPoint; }

void setCommandGraph(
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
&Graph) {
MGraph = Graph;
}

Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ void *MemoryManager::allocateMemSubBuffer(context_impl *TargetContext,
ur_result_t Error = UR_RESULT_SUCCESS;
ur_buffer_region_t Region = {UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, Offset,
SizeInBytes};
ur_mem_handle_t NewMem;
ur_mem_handle_t NewMem = nullptr;
adapter_impl &Adapter = TargetContext->getAdapter();
Error = Adapter.call_nocheck<UrApiKind::urMemBufferPartition>(
ur::cast<ur_mem_handle_t>(ParentMemObj), UR_MEM_FLAG_READ_WRITE,
Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,10 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols(
throw exception(make_error_code(errc::feature_not_supported),
"Cannot resolve external symbols, linking is unsupported "
"for the backend");

// Access to m_ExportedSymbolImages must be guarded by m_KernelIDsMutex.
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);

while (!WorkList.empty()) {
std::string Symbol = WorkList.front();
WorkList.pop();
Expand Down
23 changes: 11 additions & 12 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
}

EventImplPtr queue_impl::submit_kernel_scheduler_bypass(
KernelData &KData, std::vector<detail::EventImplPtr> &DepEvents,
KernelData &KData, const std::vector<detail::EventImplPtr> &DepEvents,
bool EventNeeded, detail::kernel_impl *KernelImplPtr,
detail::kernel_bundle_impl *KernelBundleImpPtr,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
Expand Down Expand Up @@ -500,8 +500,7 @@ EventImplPtr queue_impl::submit_kernel_scheduler_bypass(
ResultEvent->setEnqueued();
// connect returned event with dependent events
if (!isInOrder()) {
// DepEvents is not used anymore, so can move.
ResultEvent->getPreparedDepsEvents() = std::move(DepEvents);
ResultEvent->getPreparedDepsEvents() = DepEvents;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we have a copy instead of a move. Does Coverity report potential use after the move? If so, does actual use occur after the move? If no, I believe the proper fix should be to change the submit_kernel_scheduler_bypass function to accept DepEvents by value and move the vector when we call the submit_kernel_scheduler_bypass.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does Coverity report potential use after the move?

Yes.

If so, does actual use occur after the move?

It is hard to say. The issue is that the events are actually owned by another object further up. This function is effectively stealing the dependency events from the object, without that behavior being very clear. Even if use-after-move isn't present currently, it is going to be very easy for someone to not realize it has been moved and assume that the dependencies tied to the owner object are still valid.

If no, I believe the proper fix should be to change the submit_kernel_scheduler_bypass function to accept DepEvents by value and move the vector when we call the submit_kernel_scheduler_bypass.

I disagree. In the current solution, a copy only happens under certain conditions. If we change the function to take it by value we will always make a copy of the dependencies vector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree. In the current solution, a copy only happens under certain conditions. If we change the function to take it by value we will always make a copy of the dependencies vector.

The problem I see right now is that before this PR, we had a move in case of a certain condition. And now we will have a copy.

What I have in mind is that today we have something like this:

void foo(std::vector<event> &DepEvents) {
   // ...
   ResultEvent->getPreparedDepsEvents() = std::move(DepEvents);
}

void bar() {
    std::vector<event> events;
    ...

    // It is hard to control what foo will do with events when we pass it by reference
    foo(events);
}

And I suggested considering the following:

void foo(std::vector<event> DepEvents) {
   // ...
   ResultEvent->getPreparedDepsEvents() = std::move(DepEvents);
}

void bar() {
    std::vector<event> events;
    ...

    // we explicitly do move and should not care how foo process the events.
    foo(std::move(events));
}

@slawekptak, please review.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the second version with double std::move should work - there is no copy and the ownership is clear.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed this offline with @vinser52 and we agreed that doing the by-value solution can cause unnecessary copies in certain paths. As such, I plan on refactoring this code and its callers slightly to make the ownership easier to track.

// ResultEvent is local for current thread, no need to lock.
ResultEvent->cleanDepEventsThroughOneLevelUnlocked();
}
Expand Down Expand Up @@ -581,7 +580,7 @@ EventImplPtr queue_impl::submit_kernel_direct_impl(
KData.validateAndSetKernelLaunchProperties(Props, hasCommandGraph(),
getDeviceImpl());

auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &CGData,
auto SubmitKernelFunc = [&](detail::CG::StorageInitHelper &&CGData,
bool SchedulerBypass) -> EventImplPtr {
if (SchedulerBypass) {
// No need to copy/move the kernel function, so we set
Expand Down Expand Up @@ -609,12 +608,11 @@ EventImplPtr queue_impl::submit_kernel_direct_impl(
KData.getNDRDesc(), std::move(HostKernelPtr),
nullptr, // Kernel
nullptr, // KernelBundle
std::move(CGData), std::move(KData).getArgs(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you remove the std::move(KData).getArgs()?

Copy link
Contributor Author

@steffenlarsen steffenlarsen Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

KData is used in the following arguments. That's a use-after-move.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but we do not move the KData itself here. Such syntax forces calling of the right overload of getArgs() that does the move of args. With your change it will be a copy instead of move.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I was not aware of that syntax... And I suppose neither is Coverity. 😆

I still have concerns regarding it, but I can revert it here and we can discuss it in #20617 (comment).

*KData.getDeviceKernelInfoPtr(), std::move(StreamStorage),
std::move(AuxiliaryResources), detail::CGType::Kernel,
KData.getKernelCacheConfig(), KData.isCooperative(),
KData.usesClusterLaunch(), KData.getKernelWorkGroupMemorySize(),
CodeLoc));
std::move(CGData), KData.getArgs(), *KData.getDeviceKernelInfoPtr(),
std::move(StreamStorage), std::move(AuxiliaryResources),
detail::CGType::Kernel, KData.getKernelCacheConfig(),
KData.isCooperative(), KData.usesClusterLaunch(),
KData.getKernelWorkGroupMemorySize(), CodeLoc));
CommandGroup->MIsTopCodeLoc = IsTopCodeLoc;

if (auto GraphImpl = getCommandGraph(); GraphImpl) {
Expand Down Expand Up @@ -693,7 +691,8 @@ queue_impl::submit_direct(bool CallerNeedsEvent,
MNoLastEventMode.store(isInOrder() && SchedulerBypass,
std::memory_order_relaxed);

EventImplPtr EventImpl = SubmitCommandFunc(CGData, SchedulerBypass);
EventImplPtr EventImpl =
SubmitCommandFunc(std::move(CGData), SchedulerBypass);

// Sync with the last event for in order queue. For scheduler-bypass flow,
// the ordering is done at the layers below the SYCL runtime,
Expand All @@ -708,7 +707,7 @@ queue_impl::submit_direct(bool CallerNeedsEvent,
Deps.UnenqueuedCmdEvents.push_back(EventImpl);
}

return CallerNeedsEvent ? EventImpl : nullptr;
return CallerNeedsEvent ? std::move(EventImpl) : nullptr;
}

template <typename HandlerFuncT>
Expand Down
8 changes: 5 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
///
/// \return a SYCL event representing submitted command or nullptr.
EventImplPtr submit_kernel_scheduler_bypass(
KernelData &KData, std::vector<detail::EventImplPtr> &DepEvents,
KernelData &KData, const std::vector<detail::EventImplPtr> &DepEvents,
bool EventNeeded, detail::kernel_impl *KernelImplPtr,
detail::kernel_bundle_impl *KernelBundleImpPtr,
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
Expand Down Expand Up @@ -602,7 +602,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
bool CallerNeedsEvent);

void setCommandGraphUnlocked(
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
&Graph) {
MGraph = Graph;
MExtGraphDeps.reset();

Expand All @@ -614,7 +615,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
}

void setCommandGraph(
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
&Graph) {
std::lock_guard<std::mutex> Lock(MMutex);
setCommandGraphUnlocked(Graph);
}
Expand Down