Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/include/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ sycl::range<1> GetZeroDimAccessRange(BufferT Buffer) {
return std::min(Buffer.size(), size_t{1});
}

__SYCL_EXPORT device getDeviceFromHandler(handler &CommandGroupHandlerRef);
__SYCL_EXPORT device &getDeviceFromHandler(handler &CommandGroupHandlerRef);

template <typename DataT, int Dimensions, access::mode AccessMode,
access::target AccessTarget, access::placeholder IsPlaceholder,
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ template <typename Type> struct get_kernel_wrapper_name_t {
using name = __pf_kernel_wrapper<Type>;
};

__SYCL_EXPORT device getDeviceFromHandler(handler &);
__SYCL_EXPORT device &getDeviceFromHandler(handler &);
device_impl &getDeviceImplFromHandler(handler &);

// Checks if a device_global has any registered kernel usage.
Expand Down Expand Up @@ -2818,7 +2818,7 @@ class __SYCL_EXPORT handler {
access::target AccTarget, access::placeholder isPlaceholder,
typename PropertyListT>
friend class accessor;
friend device detail::getDeviceFromHandler(handler &);
friend device &detail::getDeviceFromHandler(handler &);
friend detail::device_impl &detail::getDeviceImplFromHandler(handler &);

template <typename DataT, int Dimensions, access::mode AccessMode,
Expand Down
5 changes: 2 additions & 3 deletions sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,9 +936,8 @@ exec_graph_impl::exec_graph_impl(sycl::context Context,
MQueueImpl = std::move(PlaceholderQueuePtr);
} else {
MQueueImpl = sycl::detail::queue_impl::create(
*sycl::detail::getSyclObjImpl(GraphImpl->getDevice()),
*sycl::detail::getSyclObjImpl(Context), sycl::async_handler{},
sycl::property_list{});
GraphImpl->getDevice(), *sycl::detail::getSyclObjImpl(Context),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a guarantee that graph will outlive queue?

sycl::async_handler{}, sycl::property_list{});
}
}

Expand Down
4 changes: 4 additions & 0 deletions sycl/source/detail/graph/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ class graph_impl : public std::enable_shared_from_this<graph_impl> {
/// @return Device associated with graph.
sycl::device getDevice() const { return MDevice; }

/// Query for the device tied to this graph.
/// @return Device associated with graph by reference.
sycl::device &getDevice() { return MDevice; }

/// List of root nodes.
std::set<node_impl *> MRoots;

Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ class handler_impl {

// Make the following methods templates to avoid circular dependencies for the
// includes.
template <typename Self = handler_impl> detail::device_impl &get_device() {
template <typename Self = handler_impl> device &get_device() {
Self *self = this;
if (auto *Queue = self->get_queue_or_null())
return Queue->getDeviceImpl();
return Queue->get_device();
else
return self->get_graph().getDeviceImpl();
return self->get_graph().getDevice();
}
template <typename Self = handler_impl> context_impl &get_context() {
Self *self = this;
Expand Down
11 changes: 7 additions & 4 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ event queue_impl::memset(void *Ptr, int Value, size_t Count,
// stream check.
TP.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice.getHandleRef()));
reinterpret_cast<size_t>(
getSyclObjImpl(MDevice).get()->getHandleRef()));
xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast<size_t>(Ptr));
xpti::addMetadata(TEvent, "value_set", Value);
xpti::addMetadata(TEvent, "memory_size", Count);
Expand Down Expand Up @@ -187,7 +188,8 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count,
// We will include this metadata information as it is required for memcpy.
TP.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice.getHandleRef()));
reinterpret_cast<size_t>(
getSyclObjImpl(MDevice).get()->getHandleRef()));
xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast<size_t>(Src));
xpti::addMetadata(TEvent, "dest_memory_ptr",
reinterpret_cast<size_t>(Dest));
Expand Down Expand Up @@ -941,8 +943,9 @@ void queue_impl::constructorNotification() {
reinterpret_cast<size_t>(MContext->getHandleRef()));
xpti::addMetadata(TEvent, "sycl_device_name",
MDevice.get_info<info::device::name>());
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast<size_t>(MDevice.getHandleRef()));
xpti::addMetadata(
TEvent, "sycl_device",
reinterpret_cast<size_t>(getSyclObjImpl(MDevice).get()->getHandleRef()));
xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
xpti::addMetadata(TEvent, "queue_handle",
Expand Down
27 changes: 15 additions & 12 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
/// to the queue.
/// \param AsyncHandler is a SYCL asynchronous exception handler.
/// \param PropList is a list of properties to use for queue construction.
queue_impl(device_impl &Device, const async_handler &AsyncHandler,
queue_impl(const device &Device, const async_handler &AsyncHandler,
Copy link
Contributor

@lbushi25 lbushi25 Nov 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels quite suspicious to have an implementation-layer object take as a constructor argument a public API object. Can we not, for example, directly store the device object by adding it as a field to the queue class and modifying the queue constructors to retain the device object? If that is feasible, I would strongly prefer it instead.

const property_list &PropList, private_tag tag)
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList,
tag) {};
: queue_impl(Device, getDefaultOrNew(*getSyclObjImpl(Device)),
AsyncHandler, PropList, tag) {};

/// Constructs a SYCL queue with an async_handler and property_list provided
/// form a device and a context.
Expand All @@ -114,7 +114,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
/// constructed.
/// \param AsyncHandler is a SYCL asynchronous exception handler.
/// \param PropList is a list of properties to use for queue construction.
queue_impl(device_impl &Device, std::shared_ptr<context_impl> &&Context,
queue_impl(const device &Device, std::shared_ptr<context_impl> &&Context,
const async_handler &AsyncHandler, const property_list &PropList,
private_tag)
: MDevice(Device), MContext(std::move(Context)),
Expand Down Expand Up @@ -143,7 +143,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
"Queue compute index must be a non-negative number less than "
"device's number of available compute queue indices.");
}
if (!MContext->isDeviceValid(Device)) {
if (!MContext->isDeviceValid(*getSyclObjImpl(Device))) {
if (MContext->getBackend() == backend::opencl)
throw sycl::exception(
make_error_code(errc::invalid),
Expand Down Expand Up @@ -174,7 +174,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
trySwitchingToNoEventsMode();
}

queue_impl(device_impl &Device, context_impl &Context,
queue_impl(const device &Device, context_impl &Context,
const async_handler &AsyncHandler, const property_list &PropList,
private_tag Tag)
: queue_impl(Device, Context.shared_from_this(), AsyncHandler, PropList,
Expand All @@ -192,7 +192,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
queue_impl(ur_queue_handle_t UrQueue, context_impl &Context,
const async_handler &AsyncHandler, const property_list &PropList,
private_tag)
: MDevice([&]() -> device_impl & {
: MDevice([&]() -> device {
ur_device_handle_t DeviceUr{};
adapter_impl &Adapter = Context.getAdapter();
// TODO catch an exception and put it to list of asynchronous
Expand All @@ -206,7 +206,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
make_error_code(errc::invalid),
"Device provided by native Queue not found in Context.");
}
return *Device;
return createSyclObjFromImpl<device>(*Device);
}()),
MContext(Context.shared_from_this()), MAsyncHandler(AsyncHandler),
MPropList(PropList), MQueue(UrQueue),
Expand Down Expand Up @@ -297,10 +297,13 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {

std::weak_ptr<context_impl> getContextImplWeakPtr() const { return MContext; }

device_impl &getDeviceImpl() const { return MDevice; }
device_impl &getDeviceImpl() { return *getSyclObjImpl(MDevice); }

/// \return an associated SYCL device by reference.
device &get_device() { return MDevice; }

/// \return an associated SYCL device.
device get_device() const { return createSyclObjFromImpl<device>(MDevice); }
device get_device() const { return MDevice; }

/// \return true if this queue allows for discarded events.
bool supportsDiscardingPiEvents() const { return MIsInorder; }
Expand Down Expand Up @@ -499,7 +502,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
ur_queue_handle_t createQueue(QueueOrder Order) {
ur_queue_handle_t Queue{};
ur_context_handle_t Context = MContext->getHandleRef();
ur_device_handle_t Device = MDevice.getHandleRef();
ur_device_handle_t Device = getSyclObjImpl(MDevice).get()->getHandleRef();
/*
sycl::detail::pi::PiQueueProperties Properties[] = {
PI_QUEUE_FLAGS, createPiQueueProperties(MPropList, Order), 0, 0, 0};
Expand Down Expand Up @@ -984,7 +987,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
/// Protects all the fields that can be changed by class' methods.
mutable std::mutex MMutex;

device_impl &MDevice;
device MDevice;
const std::shared_ptr<context_impl> MContext;

/// These events are tracked, but not owned, by the queue.
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ uint32_t reduGetMaxNumConcurrentWorkGroups(device_impl &Dev) {
// Returns the estimated number of physical threads on the device associated
// with the given queue.
__SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) {
return reduGetMaxNumConcurrentWorkGroups(getSyclObjImpl(cgh)->get_device());
return reduGetMaxNumConcurrentWorkGroups(getDeviceImplFromHandler(cgh));
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down Expand Up @@ -125,7 +125,7 @@ size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) {
}
__SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh,
size_t LocalMemBytesPerWorkItem) {
return reduGetMaxWGSize(getSyclObjImpl(cgh)->get_device(),
return reduGetMaxWGSize(getDeviceImplFromHandler(cgh),
LocalMemBytesPerWorkItem);
}
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down Expand Up @@ -181,7 +181,7 @@ size_t reduGetPreferredWGSize(device_impl &Dev,
}
__SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh,
size_t LocalMemBytesPerWorkItem) {
return reduGetPreferredWGSize(getSyclObjImpl(cgh)->get_device(),
return reduGetPreferredWGSize(getDeviceImplFromHandler(cgh),
LocalMemBytesPerWorkItem);
}
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
Expand Down
5 changes: 3 additions & 2 deletions sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,9 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(queue_impl *Queue,
// Since all the Scheduler commands require queue but we have only context
// here, we need to create a dummy queue bound to the context and one of the
// devices from the context.
std::shared_ptr<queue_impl> InteropQueuePtr = queue_impl::create(
Dev, *InteropCtxPtr, async_handler{}, property_list{});
std::shared_ptr<queue_impl> InteropQueuePtr =
queue_impl::create(createSyclObjFromImpl<device>(Dev), *InteropCtxPtr,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely use-after free here, temporary device will die before the next statement and queue will have a stale reference. I'm also surprised it's compilable. How do you bind an rvalue device object to an lvalue reference argument?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I guess this compiles because the queue_impl constructor takes a const reference. It then makes a copy of the object, so it should work, but probably the constructor should take it by value - that should be clean, right?

async_handler{}, property_list{});

MemObject->MRecord.reset(new MemObjRecord{InteropCtxPtr, LeafLimit,
std::move(AllocateDependency)});
Expand Down
39 changes: 17 additions & 22 deletions sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ markBufferAsInternal(const std::shared_ptr<buffer_impl> &BufImpl) {
// TODO: Check if two ABI exports below are still necessary.
#endif
device_impl &getDeviceImplFromHandler(handler &CGH) {
return getSyclObjImpl(CGH)->get_device();
return *getSyclObjImpl(getSyclObjImpl(CGH)->get_device());
}

device getDeviceFromHandler(handler &CGH) {
return createSyclObjFromImpl<device>(getSyclObjImpl(CGH)->get_device());
device &getDeviceFromHandler(handler &CGH) {
return getSyclObjImpl(CGH)->get_device();
}

bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) {
Expand Down Expand Up @@ -402,10 +402,8 @@ handler::getOrInsertHandlerKernelBundlePtr(bool Insert) const {
return impl->MKernelBundle.get();

context Ctx = detail::createSyclObjFromImpl<context>(impl->get_context());
impl->MKernelBundle =
detail::getSyclObjImpl(get_kernel_bundle<bundle_state::input>(
Ctx, {detail::createSyclObjFromImpl<device>(impl->get_device())},
{}));
impl->MKernelBundle = detail::getSyclObjImpl(
get_kernel_bundle<bundle_state::input>(Ctx, {impl->get_device()}, {}));
return impl->MKernelBundle.get();
}

Expand Down Expand Up @@ -503,12 +501,11 @@ detail::EventImplPtr handler::finalize() {
(KernelBundleImpPtr->empty() ||
KernelBundleImpPtr->hasSYCLOfflineImages()) &&
!KernelBundleImpPtr->tryGetKernel(impl->getKernelName())) {
detail::device_impl &Dev = impl->get_device();
device &Dev = impl->get_device();
kernel_id KernelID =
detail::ProgramManager::getInstance().getSYCLKernelID(
impl->getKernelName());
bool KernelInserted = KernelBundleImpPtr->add_kernel(
KernelID, detail::createSyclObjFromImpl<device>(Dev));
bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
// If kernel was not inserted and the bundle is in input mode we try
// building it and trying to find the kernel in executable mode
if (!KernelInserted &&
Expand All @@ -522,8 +519,7 @@ detail::EventImplPtr handler::finalize() {
// Raw ptr KernelBundleImpPtr is valid, because we saved the
// shared_ptr to the handler
setHandlerKernelBundle(KernelBundleImpPtr->shared_from_this());
KernelInserted = KernelBundleImpPtr->add_kernel(
KernelID, detail::createSyclObjFromImpl<device>(Dev));
KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev);
}
// If the kernel was not found in executable mode we throw an exception
if (!KernelInserted)
Expand Down Expand Up @@ -880,8 +876,7 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) {
return;

kernel_id KernelID = detail::get_kernel_id_impl(KernelName);
if (!UsedKernelBundleImplPtr->has_kernel(
KernelID, detail::createSyclObjFromImpl<device>(impl->get_device())))
if (!UsedKernelBundleImplPtr->has_kernel(KernelID, impl->get_device()))
throw sycl::exception(
make_error_code(errc::kernel_not_supported),
"The kernel bundle in use does not contain the kernel");
Expand Down Expand Up @@ -1493,8 +1488,8 @@ void handler::depends_on(const std::vector<event> &Events) {
void handler::depends_on(const detail::EventImplPtr &EventImpl) {
registerEventDependency(EventImpl, impl->CGData.MEvents,
impl->get_queue_or_null(), impl->get_context(),
impl->get_device(), getCommandGraph().get(),
getType());
*getSyclObjImpl(impl->get_device()),
getCommandGraph().get(), getType());
}

void handler::depends_on(const std::vector<detail::EventImplPtr> &Events) {
Expand Down Expand Up @@ -1535,15 +1530,15 @@ bool handler::supportsUSMMemset2D() {
}

id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) {
device_impl &Dev = impl->get_device();
device_impl &Dev = *getSyclObjImpl(impl->get_device());
range<2> ItemLimit = Dev.get_info<info::device::max_work_item_sizes<2>>() *
Dev.get_info<info::device::max_compute_units>();
return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)};
}

// TODO: do we need this still?
backend handler::getDeviceBackend() const {
return impl->get_device().getBackend();
return getSyclObjImpl(impl->get_device()).get()->getBackend();
}

void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr,
Expand Down Expand Up @@ -1595,7 +1590,7 @@ void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr,
size_t DeviceGlobalTSize,
bool IsDeviceImageScoped,
size_t NumBytes, size_t Offset) {
host_task([=, &Dev = impl->get_device(),
host_task([=, &Dev = *getSyclObjImpl(impl->get_device()),
WeakContextImpl = impl->get_context().weak_from_this()] {
// Capture context as weak to avoid keeping it alive for too long. If it is
// dead by the time this executes, the operation would not have been visible
Expand All @@ -1614,7 +1609,7 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest,
bool IsDeviceImageScoped,
size_t NumBytes, size_t Offset) {
host_task([=, Context = impl->get_context().shared_from_this(),
&Dev = impl->get_device()] {
&Dev = *getSyclObjImpl(impl->get_device())] {
// Unlike memcpy to device_global, we need to keep the context alive in the
// capture of this operation as we must be able to correctly copy the value
// to the user-specified pointer. Device is guaranteed to live until SYCL RT
Expand All @@ -1629,7 +1624,7 @@ void handler::setKernelLaunchProperties(
const detail::KernelPropertyHolderStructTy &Kprop) {
impl->MKernelData.validateAndSetKernelLaunchProperties(
Kprop, getCommandGraph() != nullptr /*hasGraph?*/,
impl->get_device() /*device_impl*/);
*getSyclObjImpl(impl->get_device()) /*device_impl*/);
}

detail::context_impl &handler::getContextImpl() const {
Expand Down Expand Up @@ -1669,7 +1664,7 @@ kernel_bundle<bundle_state::input> handler::getKernelBundle() const {
}

std::optional<std::array<size_t, 3>> handler::getMaxWorkGroups() {
device_impl &DeviceImpl = impl->get_device();
device_impl &DeviceImpl = *getSyclObjImpl(impl->get_device());
std::array<size_t, 3> UrResult = {};
auto Ret = DeviceImpl.getAdapter().call_nocheck<UrApiKind::urDeviceGetInfo>(
DeviceImpl.getHandleRef(),
Expand Down
13 changes: 5 additions & 8 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,19 @@ queue::queue(const context &SyclContext, const device_selector &DeviceSelector,

const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp);

impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice),
*detail::getSyclObjImpl(SyclContext),
AsyncHandler, PropList);
impl = detail::queue_impl::create(
SyclDevice, *detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList);
}

queue::queue(const context &SyclContext, const device &SyclDevice,
const async_handler &AsyncHandler, const property_list &PropList) {
impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice),
*detail::getSyclObjImpl(SyclContext),
AsyncHandler, PropList);
impl = detail::queue_impl::create(
SyclDevice, *detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList);
}

queue::queue(const device &SyclDevice, const async_handler &AsyncHandler,
const property_list &PropList) {
impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice),
AsyncHandler, PropList);
impl = detail::queue_impl::create(SyclDevice, AsyncHandler, PropList);
}

queue::queue(const context &SyclContext, const device_selector &deviceSelector,
Expand Down
2 changes: 1 addition & 1 deletion sycl/test/gdb/printers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ sycl::item<2, false> item_wo_offset =
// DEVICE: 16 | class sycl::range<> MemRange

// CHECK: 0 | class sycl::detail::queue_impl
// CHECK: 56 | device_impl & MDevice
// CHECK: 56 | class sycl::device MDevice

// CHECK: 0 | class sycl::accessor<int>
// HOST: 0 | {{.*}} sycl::detail::AccessorImplHost{{.*}} impl
Expand Down
Loading
Loading