diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 3f48859aefed3..1494eeba4c19d 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -286,7 +286,7 @@ sycl::range<1> GetZeroDimAccessRange(BufferT Buffer) { return std::min(Buffer.size(), size_t{1}); } -__SYCL_EXPORT device getDeviceFromHandler(handler &CommandGroupHandlerRef); +__SYCL_EXPORT device &getDeviceFromHandler(handler &CommandGroupHandlerRef); template struct get_kernel_wrapper_name_t { using name = __pf_kernel_wrapper; }; -__SYCL_EXPORT device getDeviceFromHandler(handler &); +__SYCL_EXPORT device &getDeviceFromHandler(handler &); device_impl &getDeviceImplFromHandler(handler &); // Checks if a device_global has any registered kernel usage. @@ -2818,7 +2818,7 @@ class __SYCL_EXPORT handler { access::target AccTarget, access::placeholder isPlaceholder, typename PropertyListT> friend class accessor; - friend device detail::getDeviceFromHandler(handler &); + friend device &detail::getDeviceFromHandler(handler &); friend detail::device_impl &detail::getDeviceImplFromHandler(handler &); template getDevice()), - *sycl::detail::getSyclObjImpl(Context), sycl::async_handler{}, - sycl::property_list{}); + GraphImpl->getDevice(), *sycl::detail::getSyclObjImpl(Context), + sycl::async_handler{}, sycl::property_list{}); } } diff --git a/sycl/source/detail/graph/graph_impl.hpp b/sycl/source/detail/graph/graph_impl.hpp index d35b271493ed0..44ed0aae8ce21 100644 --- a/sycl/source/detail/graph/graph_impl.hpp +++ b/sycl/source/detail/graph/graph_impl.hpp @@ -287,6 +287,10 @@ class graph_impl : public std::enable_shared_from_this { /// @return Device associated with graph. sycl::device getDevice() const { return MDevice; } + /// Query for the device tied to this graph. + /// @return Device associated with graph by reference. + sycl::device &getDevice() { return MDevice; } + /// List of root nodes. std::set MRoots; diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 6158f56698e40..05dc5256054d6 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -174,12 +174,12 @@ class handler_impl { // Make the following methods templates to avoid circular dependencies for the // includes. - template detail::device_impl &get_device() { + template device &get_device() { Self *self = this; if (auto *Queue = self->get_queue_or_null()) - return Queue->getDeviceImpl(); + return Queue->get_device(); else - return self->get_graph().getDeviceImpl(); + return self->get_graph().getDevice(); } template context_impl &get_context() { Self *self = this; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 72110faaa544e..ca4072440b78b 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -134,7 +134,8 @@ event queue_impl::memset(void *Ptr, int Value, size_t Count, // stream check. TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); + reinterpret_cast( + getSyclObjImpl(MDevice).get()->getHandleRef())); xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); xpti::addMetadata(TEvent, "value_set", Value); xpti::addMetadata(TEvent, "memory_size", Count); @@ -187,7 +188,8 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count, // We will include this metadata information as it is required for memcpy. TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); + reinterpret_cast( + getSyclObjImpl(MDevice).get()->getHandleRef())); xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src)); xpti::addMetadata(TEvent, "dest_memory_ptr", reinterpret_cast(Dest)); @@ -941,8 +943,9 @@ void queue_impl::constructorNotification() { reinterpret_cast(MContext->getHandleRef())); xpti::addMetadata(TEvent, "sycl_device_name", MDevice.get_info()); - xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); + xpti::addMetadata( + TEvent, "sycl_device", + reinterpret_cast(getSyclObjImpl(MDevice).get()->getHandleRef())); xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); xpti::addMetadata(TEvent, "queue_handle", diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 30bdf5a7bb8ab..f58ced8d734d8 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -100,10 +100,10 @@ class queue_impl : public std::enable_shared_from_this { /// to the queue. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(device_impl &Device, const async_handler &AsyncHandler, + queue_impl(const device &Device, const async_handler &AsyncHandler, const property_list &PropList, private_tag tag) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList, - tag) {}; + : queue_impl(Device, getDefaultOrNew(*getSyclObjImpl(Device)), + AsyncHandler, PropList, tag) {}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -114,7 +114,7 @@ class queue_impl : public std::enable_shared_from_this { /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(device_impl &Device, std::shared_ptr &&Context, + queue_impl(const device &Device, std::shared_ptr &&Context, const async_handler &AsyncHandler, const property_list &PropList, private_tag) : MDevice(Device), MContext(std::move(Context)), @@ -143,7 +143,7 @@ class queue_impl : public std::enable_shared_from_this { "Queue compute index must be a non-negative number less than " "device's number of available compute queue indices."); } - if (!MContext->isDeviceValid(Device)) { + if (!MContext->isDeviceValid(*getSyclObjImpl(Device))) { if (MContext->getBackend() == backend::opencl) throw sycl::exception( make_error_code(errc::invalid), @@ -174,7 +174,7 @@ class queue_impl : public std::enable_shared_from_this { trySwitchingToNoEventsMode(); } - queue_impl(device_impl &Device, context_impl &Context, + queue_impl(const device &Device, context_impl &Context, const async_handler &AsyncHandler, const property_list &PropList, private_tag Tag) : queue_impl(Device, Context.shared_from_this(), AsyncHandler, PropList, @@ -192,7 +192,7 @@ class queue_impl : public std::enable_shared_from_this { queue_impl(ur_queue_handle_t UrQueue, context_impl &Context, const async_handler &AsyncHandler, const property_list &PropList, private_tag) - : MDevice([&]() -> device_impl & { + : MDevice([&]() -> device { ur_device_handle_t DeviceUr{}; adapter_impl &Adapter = Context.getAdapter(); // TODO catch an exception and put it to list of asynchronous @@ -206,7 +206,7 @@ class queue_impl : public std::enable_shared_from_this { make_error_code(errc::invalid), "Device provided by native Queue not found in Context."); } - return *Device; + return createSyclObjFromImpl(*Device); }()), MContext(Context.shared_from_this()), MAsyncHandler(AsyncHandler), MPropList(PropList), MQueue(UrQueue), @@ -297,10 +297,13 @@ class queue_impl : public std::enable_shared_from_this { std::weak_ptr getContextImplWeakPtr() const { return MContext; } - device_impl &getDeviceImpl() const { return MDevice; } + device_impl &getDeviceImpl() { return *getSyclObjImpl(MDevice); } + + /// \return an associated SYCL device by reference. + device &get_device() { return MDevice; } /// \return an associated SYCL device. - device get_device() const { return createSyclObjFromImpl(MDevice); } + device get_device() const { return MDevice; } /// \return true if this queue allows for discarded events. bool supportsDiscardingPiEvents() const { return MIsInorder; } @@ -499,7 +502,7 @@ class queue_impl : public std::enable_shared_from_this { ur_queue_handle_t createQueue(QueueOrder Order) { ur_queue_handle_t Queue{}; ur_context_handle_t Context = MContext->getHandleRef(); - ur_device_handle_t Device = MDevice.getHandleRef(); + ur_device_handle_t Device = getSyclObjImpl(MDevice).get()->getHandleRef(); /* sycl::detail::pi::PiQueueProperties Properties[] = { PI_QUEUE_FLAGS, createPiQueueProperties(MPropList, Order), 0, 0, 0}; @@ -984,7 +987,7 @@ class queue_impl : public std::enable_shared_from_this { /// Protects all the fields that can be changed by class' methods. mutable std::mutex MMutex; - device_impl &MDevice; + device MDevice; const std::shared_ptr MContext; /// These events are tracked, but not owned, by the queue. diff --git a/sycl/source/detail/reduction.cpp b/sycl/source/detail/reduction.cpp index 84a8722c96e76..3eff2ed2e73d6 100644 --- a/sycl/source/detail/reduction.cpp +++ b/sycl/source/detail/reduction.cpp @@ -63,7 +63,7 @@ uint32_t reduGetMaxNumConcurrentWorkGroups(device_impl &Dev) { // Returns the estimated number of physical threads on the device associated // with the given queue. __SYCL_EXPORT uint32_t reduGetMaxNumConcurrentWorkGroups(handler &cgh) { - return reduGetMaxNumConcurrentWorkGroups(getSyclObjImpl(cgh)->get_device()); + return reduGetMaxNumConcurrentWorkGroups(getDeviceImplFromHandler(cgh)); } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES @@ -125,7 +125,7 @@ size_t reduGetMaxWGSize(device_impl &Dev, size_t LocalMemBytesPerWorkItem) { } __SYCL_EXPORT size_t reduGetMaxWGSize(handler &cgh, size_t LocalMemBytesPerWorkItem) { - return reduGetMaxWGSize(getSyclObjImpl(cgh)->get_device(), + return reduGetMaxWGSize(getDeviceImplFromHandler(cgh), LocalMemBytesPerWorkItem); } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES @@ -181,7 +181,7 @@ size_t reduGetPreferredWGSize(device_impl &Dev, } __SYCL_EXPORT size_t reduGetPreferredWGSize(handler &cgh, size_t LocalMemBytesPerWorkItem) { - return reduGetPreferredWGSize(getSyclObjImpl(cgh)->get_device(), + return reduGetPreferredWGSize(getDeviceImplFromHandler(cgh), LocalMemBytesPerWorkItem); } #ifndef __INTEL_PREVIEW_BREAKING_CHANGES diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 046d3f25d066e..2281bab85ac7b 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -220,8 +220,9 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(queue_impl *Queue, // Since all the Scheduler commands require queue but we have only context // here, we need to create a dummy queue bound to the context and one of the // devices from the context. - std::shared_ptr InteropQueuePtr = queue_impl::create( - Dev, *InteropCtxPtr, async_handler{}, property_list{}); + std::shared_ptr InteropQueuePtr = + queue_impl::create(createSyclObjFromImpl(Dev), *InteropCtxPtr, + async_handler{}, property_list{}); MemObject->MRecord.reset(new MemObjRecord{InteropCtxPtr, LeafLimit, std::move(AllocateDependency)}); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 58a4139722321..400fd00188e98 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -60,11 +60,11 @@ markBufferAsInternal(const std::shared_ptr &BufImpl) { // TODO: Check if two ABI exports below are still necessary. #endif device_impl &getDeviceImplFromHandler(handler &CGH) { - return getSyclObjImpl(CGH)->get_device(); + return *getSyclObjImpl(getSyclObjImpl(CGH)->get_device()); } -device getDeviceFromHandler(handler &CGH) { - return createSyclObjFromImpl(getSyclObjImpl(CGH)->get_device()); +device &getDeviceFromHandler(handler &CGH) { + return getSyclObjImpl(CGH)->get_device(); } bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) { @@ -402,10 +402,8 @@ handler::getOrInsertHandlerKernelBundlePtr(bool Insert) const { return impl->MKernelBundle.get(); context Ctx = detail::createSyclObjFromImpl(impl->get_context()); - impl->MKernelBundle = - detail::getSyclObjImpl(get_kernel_bundle( - Ctx, {detail::createSyclObjFromImpl(impl->get_device())}, - {})); + impl->MKernelBundle = detail::getSyclObjImpl( + get_kernel_bundle(Ctx, {impl->get_device()}, {})); return impl->MKernelBundle.get(); } @@ -503,12 +501,11 @@ detail::EventImplPtr handler::finalize() { (KernelBundleImpPtr->empty() || KernelBundleImpPtr->hasSYCLOfflineImages()) && !KernelBundleImpPtr->tryGetKernel(impl->getKernelName())) { - detail::device_impl &Dev = impl->get_device(); + device &Dev = impl->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( impl->getKernelName()); - bool KernelInserted = KernelBundleImpPtr->add_kernel( - KernelID, detail::createSyclObjFromImpl(Dev)); + bool KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev); // If kernel was not inserted and the bundle is in input mode we try // building it and trying to find the kernel in executable mode if (!KernelInserted && @@ -522,8 +519,7 @@ detail::EventImplPtr handler::finalize() { // Raw ptr KernelBundleImpPtr is valid, because we saved the // shared_ptr to the handler setHandlerKernelBundle(KernelBundleImpPtr->shared_from_this()); - KernelInserted = KernelBundleImpPtr->add_kernel( - KernelID, detail::createSyclObjFromImpl(Dev)); + KernelInserted = KernelBundleImpPtr->add_kernel(KernelID, Dev); } // If the kernel was not found in executable mode we throw an exception if (!KernelInserted) @@ -880,8 +876,7 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { return; kernel_id KernelID = detail::get_kernel_id_impl(KernelName); - if (!UsedKernelBundleImplPtr->has_kernel( - KernelID, detail::createSyclObjFromImpl(impl->get_device()))) + if (!UsedKernelBundleImplPtr->has_kernel(KernelID, impl->get_device())) throw sycl::exception( make_error_code(errc::kernel_not_supported), "The kernel bundle in use does not contain the kernel"); @@ -1493,8 +1488,8 @@ void handler::depends_on(const std::vector &Events) { void handler::depends_on(const detail::EventImplPtr &EventImpl) { registerEventDependency(EventImpl, impl->CGData.MEvents, impl->get_queue_or_null(), impl->get_context(), - impl->get_device(), getCommandGraph().get(), - getType()); + *getSyclObjImpl(impl->get_device()), + getCommandGraph().get(), getType()); } void handler::depends_on(const std::vector &Events) { @@ -1535,7 +1530,7 @@ bool handler::supportsUSMMemset2D() { } id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { - device_impl &Dev = impl->get_device(); + device_impl &Dev = *getSyclObjImpl(impl->get_device()); range<2> ItemLimit = Dev.get_info>() * Dev.get_info(); return id<2>{std::min(ItemLimit[0], Height), std::min(ItemLimit[1], Width)}; @@ -1543,7 +1538,7 @@ id<2> handler::computeFallbackKernelBounds(size_t Width, size_t Height) { // TODO: do we need this still? backend handler::getDeviceBackend() const { - return impl->get_device().getBackend(); + return getSyclObjImpl(impl->get_device()).get()->getBackend(); } void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, @@ -1595,7 +1590,7 @@ void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr, size_t DeviceGlobalTSize, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset) { - host_task([=, &Dev = impl->get_device(), + host_task([=, &Dev = *getSyclObjImpl(impl->get_device()), WeakContextImpl = impl->get_context().weak_from_this()] { // Capture context as weak to avoid keeping it alive for too long. If it is // dead by the time this executes, the operation would not have been visible @@ -1614,7 +1609,7 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, bool IsDeviceImageScoped, size_t NumBytes, size_t Offset) { host_task([=, Context = impl->get_context().shared_from_this(), - &Dev = impl->get_device()] { + &Dev = *getSyclObjImpl(impl->get_device())] { // Unlike memcpy to device_global, we need to keep the context alive in the // capture of this operation as we must be able to correctly copy the value // to the user-specified pointer. Device is guaranteed to live until SYCL RT @@ -1629,7 +1624,7 @@ void handler::setKernelLaunchProperties( const detail::KernelPropertyHolderStructTy &Kprop) { impl->MKernelData.validateAndSetKernelLaunchProperties( Kprop, getCommandGraph() != nullptr /*hasGraph?*/, - impl->get_device() /*device_impl*/); + *getSyclObjImpl(impl->get_device()) /*device_impl*/); } detail::context_impl &handler::getContextImpl() const { @@ -1669,7 +1664,7 @@ kernel_bundle handler::getKernelBundle() const { } std::optional> handler::getMaxWorkGroups() { - device_impl &DeviceImpl = impl->get_device(); + device_impl &DeviceImpl = *getSyclObjImpl(impl->get_device()); std::array UrResult = {}; auto Ret = DeviceImpl.getAdapter().call_nocheck( DeviceImpl.getHandleRef(), diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index c3bd447445d39..743baed746396 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -30,22 +30,19 @@ queue::queue(const context &SyclContext, const device_selector &DeviceSelector, const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp); - impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), - *detail::getSyclObjImpl(SyclContext), - AsyncHandler, PropList); + impl = detail::queue_impl::create( + SyclDevice, *detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { - impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), - *detail::getSyclObjImpl(SyclContext), - AsyncHandler, PropList); + impl = detail::queue_impl::create( + SyclDevice, *detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); } queue::queue(const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { - impl = detail::queue_impl::create(*detail::getSyclObjImpl(SyclDevice), - AsyncHandler, PropList); + impl = detail::queue_impl::create(SyclDevice, AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device_selector &deviceSelector, diff --git a/sycl/test/gdb/printers.cpp b/sycl/test/gdb/printers.cpp index 7e7898207a82b..01e753460b1aa 100644 --- a/sycl/test/gdb/printers.cpp +++ b/sycl/test/gdb/printers.cpp @@ -77,7 +77,7 @@ sycl::item<2, false> item_wo_offset = // DEVICE: 16 | class sycl::range<> MemRange // CHECK: 0 | class sycl::detail::queue_impl -// CHECK: 56 | device_impl & MDevice +// CHECK: 56 | class sycl::device MDevice // CHECK: 0 | class sycl::accessor // HOST: 0 | {{.*}} sycl::detail::AccessorImplHost{{.*}} impl diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index 66509107d8967..5ca213a45b330 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -25,8 +25,7 @@ constexpr auto DisableCleanupName = "SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP"; class TestQueueImpl : public sycl::detail::queue_impl { public: - TestQueueImpl(sycl::detail::context_impl &SyclContext, - sycl::detail::device_impl &Dev) + TestQueueImpl(sycl::detail::context_impl &SyclContext, sycl::device &Dev) : sycl::detail::queue_impl(Dev, SyclContext, SyclContext.get_async_handler(), {}, sycl::detail::queue_impl::private_tag{}) {} @@ -46,8 +45,7 @@ class BarrierHandlingWithHostTask : public ::testing::Test { sycl::device SyclDev = sycl::detail::select_device(sycl::default_selector_v, SyclContext); QueueDevImpl.reset( - new TestQueueImpl(*sycl::detail::getSyclObjImpl(SyclContext), - *sycl::detail::getSyclObjImpl(SyclDev))); + new TestQueueImpl(*sycl::detail::getSyclObjImpl(SyclContext), SyclDev)); MainLock.lock(); } diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index ee305793107da..55ff96c0c0d16 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -22,8 +22,7 @@ using ::testing::An; class MockQueueImpl : public sycl::detail::queue_impl { public: - MockQueueImpl(sycl::detail::device_impl &Device, - const sycl::async_handler &AsyncHandler, + MockQueueImpl(sycl::device &Device, const sycl::async_handler &AsyncHandler, const sycl::property_list &PropList) : sycl::detail::queue_impl(Device, AsyncHandler, PropList, sycl::detail::queue_impl::private_tag{}) {} @@ -80,10 +79,9 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { sycl::unittest::UrMock<> Mock; platform Plt = sycl::platform(); - const sycl::device Dev = Plt.get_devices()[0]; + sycl::device Dev = Plt.get_devices()[0]; auto Queue = std::make_shared( - *sycl::detail::getSyclObjImpl(Dev), sycl::async_handler{}, - sycl::property::queue::in_order()); + Dev, sycl::async_handler{}, sycl::property::queue::in_order()); // Check that tasks submitted to an in-order queue implicitly depend_on the // previous task, this is needed to properly sync blocking & blocked tasks.