diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp
index 0185c611bec59..ecce4e58ab660 100644
--- a/sycl/include/sycl/handler.hpp
+++ b/sycl/include/sycl/handler.hpp
@@ -2839,10 +2839,15 @@ class __SYCL_EXPORT handler {
   /// Executes a command_graph.
   ///
   /// \param Graph Executable command_graph to run
+#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
+  void
+  ext_oneapi_graph(const ext::oneapi::experimental::command_graph<
+                   ext::oneapi::experimental::graph_state::executable> &Graph);
+#else
   void ext_oneapi_graph(ext::oneapi::experimental::command_graph<
                         ext::oneapi::experimental::graph_state::executable>
                             Graph);
-
+#endif
   /// Copies data from host to device, where \p Src is a USM pointer and \p Dest
   /// is an opaque image memory handle. An exception is thrown if either \p Src
   /// is nullptr or \p Dest is incomplete. The behavior is undefined if
diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp
index 968100d2d9ea4..514ad88465419 100644
--- a/sycl/source/detail/graph/graph_impl.cpp
+++ b/sycl/source/detail/graph/graph_impl.cpp
@@ -835,7 +835,7 @@ void exec_graph_impl::createCommandBuffers(
     throw sycl::exception(errc::invalid, "Failed to create UR command-buffer");
   }
 
-  Partition->MCommandBuffers[Device] = OutCommandBuffer;
+  Partition->MCommandBuffer = OutCommandBuffer;
 
   for (node_impl &Node : Partition->schedule()) {
     // Some nodes are not scheduled like other nodes, and only their
@@ -910,13 +910,12 @@ exec_graph_impl::~exec_graph_impl() {
 
     for (const auto &Partition : MPartitions) {
       Partition->MSchedule.clear();
-      for (const auto &Iter : Partition->MCommandBuffers) {
-        if (auto CmdBuf = Iter.second; CmdBuf) {
-          ur_result_t Res = Adapter.call_nocheck<
-              sycl::detail::UrApiKind::urCommandBufferReleaseExp>(CmdBuf);
-          (void)Res;
-          assert(Res == UR_RESULT_SUCCESS);
-        }
+      if (Partition->MCommandBuffer) {
+        ur_result_t Res = Adapter.call_nocheck<
+            sycl::detail::UrApiKind::urCommandBufferReleaseExp>(
+            Partition->MCommandBuffer);
+        (void)Res;
+        assert(Res == UR_RESULT_SUCCESS);
       }
     }
   } catch (std::exception &e) {
@@ -988,11 +987,9 @@ EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler(
                               Partition->MAccessors.end());
   }
 
-  auto CommandBuffer = Partition->MCommandBuffers[Queue.get_device()];
-
   std::unique_ptr<sycl::detail::CG> CommandGroup =
       std::make_unique<sycl::detail::CGExecCommandBuffer>(
-          CommandBuffer, nullptr, std::move(CGData));
+          Partition->MCommandBuffer, nullptr, std::move(CGData));
 
   EventImplPtr SchedulerEvent = sycl::detail::Scheduler::getInstance().addCG(
       std::move(CommandGroup), Queue, EventNeeded);
@@ -1009,25 +1006,28 @@ EventImplPtr exec_graph_impl::enqueuePartitionDirectly(
     std::shared_ptr<partition> &Partition, sycl::detail::queue_impl &Queue,
     std::vector<detail::EventImplPtr> &WaitEvents, bool EventNeeded) {
 
+  ur_event_handle_t *UrEnqueueWaitList = nullptr;
+  size_t UrEnqueueWaitListSize = 0;
+
   // Create a list containing all the UR event handles in WaitEvents. WaitEvents
   // is assumed to be safe for scheduler bypass and any host-task events that it
   // contains can be ignored.
   std::vector<ur_event_handle_t> UrEventHandles{};
-  UrEventHandles.reserve(WaitEvents.size());
-  for (auto &SyclWaitEvent : WaitEvents) {
-    if (auto URHandle = SyclWaitEvent->getHandle()) {
-      UrEventHandles.push_back(URHandle);
+  if (!WaitEvents.empty()) {
+    UrEventHandles.reserve(WaitEvents.size());
+    for (auto &SyclWaitEvent : WaitEvents) {
+      if (auto URHandle = SyclWaitEvent->getHandle()) {
+        UrEventHandles.push_back(URHandle);
+      }
     }
-  }
 
-  auto CommandBuffer = Partition->MCommandBuffers[Queue.get_device()];
-  const size_t UrEnqueueWaitListSize = UrEventHandles.size();
-  const ur_event_handle_t *UrEnqueueWaitList =
-      UrEnqueueWaitListSize == 0 ? nullptr : UrEventHandles.data();
+    UrEnqueueWaitList = UrEventHandles.data();
+    UrEnqueueWaitListSize = UrEventHandles.size();
+  }
 
   if (!EventNeeded) {
     Queue.getAdapter().call<sycl::detail::UrApiKind::urEnqueueCommandBufferExp>(
-        Queue.getHandleRef(), CommandBuffer, UrEnqueueWaitListSize,
+        Queue.getHandleRef(), Partition->MCommandBuffer, UrEnqueueWaitListSize,
         UrEnqueueWaitList, nullptr);
     return nullptr;
   } else {
@@ -1037,7 +1037,7 @@ EventImplPtr exec_graph_impl::enqueuePartitionDirectly(
     NewEvent->setSubmissionTime();
     ur_event_handle_t UrEvent = nullptr;
     Queue.getAdapter().call<sycl::detail::UrApiKind::urEnqueueCommandBufferExp>(
-        Queue.getHandleRef(), CommandBuffer, UrEventHandles.size(),
+        Queue.getHandleRef(), Partition->MCommandBuffer, UrEventHandles.size(),
         UrEnqueueWaitList, &UrEvent);
     NewEvent->setHandle(UrEvent);
     NewEvent->setEventFromSubmittedExecCommandBuffer(true);
@@ -1161,17 +1161,20 @@ exec_graph_impl::enqueuePartitions(sycl::detail::queue_impl &Queue,
 
 EventImplPtr
 exec_graph_impl::enqueue(sycl::detail::queue_impl &Queue,
-                         sycl::detail::CG::StorageInitHelper CGData,
+                         sycl::detail::CG::StorageInitHelper &CGData,
                          bool EventNeeded) {
   WriteLock Lock(MMutex);
 
-  cleanupExecutionEvents(MSchedulerDependencies);
-  CGData.MEvents.insert(CGData.MEvents.end(), MSchedulerDependencies.begin(),
-                        MSchedulerDependencies.end());
+  if (!MSchedulerDependencies.empty()) {
+    cleanupExecutionEvents(MSchedulerDependencies);
+    CGData.MEvents.insert(CGData.MEvents.end(), MSchedulerDependencies.begin(),
+                          MSchedulerDependencies.end());
+  }
 
   bool IsCGDataSafeForSchedulerBypass =
-      detail::Scheduler::areEventsSafeForSchedulerBypass(
-          CGData.MEvents, Queue.getContextImpl()) &&
+      (CGData.MEvents.empty() ||
+       detail::Scheduler::areEventsSafeForSchedulerBypass(
+           CGData.MEvents, Queue.getContextImpl())) &&
       CGData.MRequirements.empty();
 
   // This variable represents the returned event. It will always be nullptr if
@@ -1478,8 +1481,7 @@ void exec_graph_impl::update(nodes_range Nodes) {
     auto PartitionedNodes = getURUpdatableNodes(Nodes);
     for (auto &[PartitionIndex, NodeImpl] : PartitionedNodes) {
       auto &Partition = MPartitions[PartitionIndex];
-      auto CommandBuffer = Partition->MCommandBuffers[MDevice];
-      updateURImpl(CommandBuffer, NodeImpl);
+      updateURImpl(Partition->MCommandBuffer, NodeImpl);
     }
   }
 
diff --git a/sycl/source/detail/graph/graph_impl.hpp b/sycl/source/detail/graph/graph_impl.hpp
index eedfcf0506bf3..2e29f950a4f9d 100644
--- a/sycl/source/detail/graph/graph_impl.hpp
+++ b/sycl/source/detail/graph/graph_impl.hpp
@@ -50,15 +50,16 @@ class dynamic_command_group_impl;
 class partition {
 public:
   /// Constructor.
-  partition() : MSchedule(), MCommandBuffers() {}
+  partition() : MSchedule() {}
 
   /// List of root nodes.
   std::set<node_impl *> MRoots;
   /// Execution schedule of nodes in the graph.
   std::list<node_impl *> MSchedule;
-  /// Map of devices to command buffers.
-  std::unordered_map<sycl::device, ur_exp_command_buffer_handle_t>
-      MCommandBuffers;
+
+  /// Command buffer associated with this partition
+  ur_exp_command_buffer_handle_t MCommandBuffer = nullptr;
+
   /// List of predecessors to this partition.
   std::vector<partition *> MPredecessors;
 
@@ -588,7 +589,7 @@ class graph_impl : public std::enable_shared_from_this<graph_impl> {
 };
 
 /// Class representing the implementation of command_graph<executable>.
-class exec_graph_impl {
+class exec_graph_impl : public std::enable_shared_from_this<exec_graph_impl> {
 public:
   using ReadLock = std::shared_lock<std::shared_mutex>;
   using WriteLock = std::unique_lock<std::shared_mutex>;
@@ -628,7 +629,7 @@ class exec_graph_impl {
   /// @return Returns an event if EventNeeded is true. Returns nullptr
   /// otherwise.
   EventImplPtr enqueue(sycl::detail::queue_impl &Queue,
-                       sycl::detail::CG::StorageInitHelper CGData,
+                       sycl::detail::CG::StorageInitHelper &CGData,
                        bool EventNeeded);
 
   /// Iterates through all the nodes in the graph to build the list of
diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp
index edf32dfa80f7e..479bee005a2ce 100644
--- a/sycl/source/detail/handler_impl.hpp
+++ b/sycl/source/detail/handler_impl.hpp
@@ -203,10 +203,7 @@ class handler_impl {
 
   /// If we are submitting a graph using ext_oneapi_graph this will be the graph
   /// to be executed.
-  std::shared_ptr<ext::oneapi::experimental::detail::exec_graph_impl>
-      MExecGraph;
-  /// Storage for a node created from a subgraph submission.
-  std::shared_ptr<ext::oneapi::experimental::detail::node_impl> MSubgraphNode;
+  ext::oneapi::experimental::detail::exec_graph_impl *MExecGraph;
   /// Storage for the CG created when handling graph nodes added explicitly.
   std::unique_ptr<detail::CG> MGraphNodeCG;
   /// Storage for node dependencies passed when adding a graph node explicitly
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 466a314939024..348ecc1edfd32 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -350,9 +350,6 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
   // Host and interop tasks, however, are not submitted to low-level runtimes
   // and require separate dependency management.
   const CGType Type = HandlerImpl.MCGType;
-  std::vector<StreamImplPtr> Streams;
-  if (Type == CGType::Kernel)
-    Streams = std::move(Handler.MStreamStorage);
 
   HandlerImpl.MEventMode = SubmitInfo.EventMode();
 
@@ -360,7 +357,8 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
                     (Type == CGType::ExecCommandBuffer &&
                      HandlerImpl.MExecGraph->containsHostTask());
 
-  auto requiresPostProcess = SubmitInfo.PostProcessorFunc() || Streams.size();
+  auto requiresPostProcess =
+      SubmitInfo.PostProcessorFunc() || Handler.MStreamStorage.size();
   auto noLastEventPath = !isHostTask &&
                          MNoLastEventMode.load(std::memory_order_acquire) &&
                          !requiresPostProcess;
@@ -402,21 +400,23 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
     handlerPostProcess(Handler, SubmitInfo.PostProcessorFunc(), Event);
   }
 
-  for (auto &Stream : Streams) {
-    // We don't want stream flushing to be blocking operation that is why submit
-    // a host task to print stream buffer. It will fire up as soon as the kernel
-    // finishes execution.
-    auto L = [&](handler &ServiceCGH) {
-      Stream->generateFlushCommand(ServiceCGH);
-    };
-    detail::type_erased_cgfo_ty CGF{L};
-    detail::EventImplPtr FlushEvent =
-        submit_impl(CGF, /*CallerNeedsEvent*/ true, Loc, IsTopCodeLoc, {});
-    if (EventImpl)
-      EventImpl->attachEventToCompleteWeak(FlushEvent);
-    if (!isInOrder()) {
-      // For in-order queue, the dependencies will be tracked by LastEvent
-      registerStreamServiceEvent(FlushEvent);
+  if (Type == CGType::Kernel) {
+    for (auto &Stream : Handler.MStreamStorage) {
+      // We don't want stream flushing to be blocking operation that is why
+      // submit a host task to print stream buffer. It will fire up as soon as
+      // the kernel finishes execution.
+      auto L = [&](handler &ServiceCGH) {
+        Stream->generateFlushCommand(ServiceCGH);
+      };
+      detail::type_erased_cgfo_ty CGF{L};
+      detail::EventImplPtr FlushEvent =
+          submit_impl(CGF, /*CallerNeedsEvent*/ true, Loc, IsTopCodeLoc, {});
+      if (EventImpl)
+        EventImpl->attachEventToCompleteWeak(FlushEvent);
+      if (!isInOrder()) {
+        // For in-order queue, the dependencies will be tracked by LastEvent
+        registerStreamServiceEvent(FlushEvent);
+      }
     }
   }
 
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index 6066ed6b3de50..c5d96d626e418 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -737,7 +737,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
 
     MEmpty.store(false, std::memory_order_release);
 
-    synchronizeWithExternalEvent(Handler);
+    if (MInOrderExternalEvent.read_unlocked()) {
+      synchronizeWithExternalEvent(Handler);
+    }
 
     auto Event = parseEvent(Handler.finalize());
 
@@ -778,7 +780,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
     MEmpty = false;
     MNoLastEventMode = false;
 
-    synchronizeWithExternalEvent(Handler);
+    if (MInOrderExternalEvent.read_unlocked()) {
+      synchronizeWithExternalEvent(Handler);
+    }
 
     EventToBuildDeps = parseEvent(Handler.finalize());
     assert(EventToBuildDeps);
@@ -808,7 +812,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
 
     MEmpty = false;
 
-    synchronizeWithExternalEvent(Handler);
+    if (MInOrderExternalEvent.read_unlocked()) {
+      synchronizeWithExternalEvent(Handler);
+    }
 
     EventToBuildDeps = parseEvent(Handler.finalize());
     if (EventToBuildDeps)
@@ -1010,12 +1016,16 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
         }
       }
     }
+
     DataType read() {
       if (!MIsSet.load(std::memory_order_acquire))
         return DataType{};
       std::lock_guard<std::mutex> Lock(MDataMtx);
       return MData;
     }
+
+    // To use when the queue is already acquired a mutex lock.
+    DataType read_unlocked() { return MData; }
   };
 
   const bool MIsInorder;
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 19beb3235e21b..d7845afedfceb 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -3845,8 +3845,7 @@ ur_result_t UpdateCommandBufferCommand::enqueueImp() {
   auto Device = MQueue->get_device();
   auto &Partitions = MGraph->getPartitions();
   for (auto &[PartitionIndex, NodeImpl] : PartitionedNodes) {
-    auto CommandBuffer = Partitions[PartitionIndex]->MCommandBuffers[Device];
-    MGraph->updateURImpl(CommandBuffer, NodeImpl);
+    MGraph->updateURImpl(Partitions[PartitionIndex]->MCommandBuffer, NodeImpl);
   }
 
   return UR_RESULT_SUCCESS;
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 79bcdbf62aa47..c6bc4a041ddd2 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -485,12 +485,18 @@ event handler::finalize() {
 #endif
 
   const auto &type = getType();
-  detail::queue_impl *Queue = impl->get_queue_or_null();
-  ext::oneapi::experimental::detail::graph_impl *Graph =
-      impl->get_graph_or_null();
+  detail::queue_impl *const Queue = impl->get_queue_or_null();
+
+  const bool IsQueueBeingRecorded = Queue && Queue->hasCommandGraph();
+  const bool IsExplicitGraphAPI = impl->get_graph_or_null() != nullptr;
+  const bool IsGraphEnqueue =
+      impl->MCGType == detail::CGType::ExecCommandBuffer;
+  const bool IsGraphRelated =
+      IsQueueBeingRecorded || IsExplicitGraphAPI || IsGraphEnqueue;
+
   const bool KernelFastPath =
-      (Queue && !Graph && !impl->MSubgraphNode && !Queue->hasCommandGraph() &&
-       !impl->CGData.MRequirements.size() && !MStreamStorage.size() &&
+      (Queue && !IsGraphRelated && !impl->CGData.MRequirements.size() &&
+       !MStreamStorage.size() &&
        detail::Scheduler::areEventsSafeForSchedulerBypass(
            impl->CGData.MEvents, Queue->getContextImpl()));
 
@@ -507,7 +513,7 @@ event handler::finalize() {
   // According to 4.7.6.9 of SYCL2020 spec, if a placeholder accessor is passed
   // to a command without being bound to a command group, an exception should
   // be thrown.
-  {
+  if (!IsGraphEnqueue) {
     for (const auto &arg : impl->MArgs) {
       if (arg.MType != detail::kernel_param_kind_t::kind_accessor)
         continue;
@@ -608,8 +614,8 @@ event handler::finalize() {
       // the graph is not changed, then this faster path is used to submit
       // kernel bypassing scheduler and avoiding CommandGroup, Command objects
       // creation.
-      std::vector<ur_event_handle_t> RawEvents = detail::Command::getUrEvents(
-          impl->CGData.MEvents, impl->get_queue_or_null(), false);
+      std::vector<ur_event_handle_t> RawEvents =
+          detail::Command::getUrEvents(impl->CGData.MEvents, Queue, false);
 
       bool DiscardEvent =
           !impl->MEventNeeded && impl->get_queue().supportsDiscardingPiEvents();
@@ -638,9 +644,8 @@ event handler::finalize() {
         if (xptiEnabled) {
           std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
               detail::GSYCLStreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc,
-              MKernelName.data(), impl->MKernelNameBasedCachePtr,
-              impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr,
-              impl->MArgs);
+              MKernelName.data(), impl->MKernelNameBasedCachePtr, Queue,
+              impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs);
           detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID,
                                              CmdTraceEvent,
                                              xpti::trace_task_begin, nullptr);
@@ -717,7 +722,7 @@ event handler::finalize() {
         impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel),
         std::move(impl->MKernelBundle), std::move(impl->CGData),
         std::move(impl->MArgs), toKernelNameStrT(MKernelName),
-        impl->MKernelNameBasedCachePtr, std::move(MStreamStorage),
+        impl->MKernelNameBasedCachePtr, MStreamStorage,
         std::move(impl->MAuxiliaryResources), getType(),
         impl->MKernelCacheConfig, impl->MKernelIsCooperative,
         impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize,
@@ -776,7 +781,6 @@ event handler::finalize() {
   case detail::CGType::EnqueueNativeCommand:
   case detail::CGType::CodeplayHostTask: {
     detail::context_impl &Context = impl->get_context();
-    detail::queue_impl *Queue = impl->get_queue_or_null();
     CommandGroup.reset(new detail::CGHostTask(
         std::move(impl->MHostTask), Queue, &Context, std::move(impl->MArgs),
         std::move(impl->CGData), getType(), MCodeLoc));
@@ -825,7 +829,6 @@ event handler::finalize() {
     break;
   }
   case detail::CGType::ExecCommandBuffer: {
-    detail::queue_impl *Queue = impl->get_queue_or_null();
     std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> ParentGraph =
         Queue ? Queue->getCommandGraph() : impl->get_graph().shared_from_this();
 
@@ -842,15 +845,15 @@ event handler::finalize() {
       // pass the exec_graph_impl and event dependencies. Since this subgraph CG
       // will not be executed this is fine.
       CommandGroup.reset(new sycl::detail::CGExecCommandBuffer(
-          nullptr, impl->MExecGraph, std::move(impl->CGData)));
+          nullptr, impl->MExecGraph->shared_from_this(),
+          std::move(impl->CGData)));
 
     } else {
-      detail::queue_impl &Queue = impl->get_queue();
       bool DiscardEvent = !impl->MEventNeeded &&
-                          Queue.supportsDiscardingPiEvents() &&
+                          Queue->supportsDiscardingPiEvents() &&
                           !impl->MExecGraph->containsHostTask();
-      detail::EventImplPtr GraphCompletionEvent = impl->MExecGraph->enqueue(
-          Queue, std::move(impl->CGData), !DiscardEvent);
+      detail::EventImplPtr GraphCompletionEvent =
+          impl->MExecGraph->enqueue(*Queue, impl->CGData, !DiscardEvent);
 #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
       return GraphCompletionEvent;
 #else
@@ -903,7 +906,7 @@ event handler::finalize() {
   // If there is a graph associated with the handler we are in the explicit
   // graph mode, so we store the CG instead of submitting it to the scheduler,
   // so it can be retrieved by the graph later.
-  if (impl->get_graph_or_null()) {
+  if (IsExplicitGraphAPI) {
     impl->MGraphNodeCG = std::move(CommandGroup);
     auto EventImpl = detail::event_impl::create_completed_host_event();
 #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
@@ -918,7 +921,8 @@ event handler::finalize() {
 
   // If the queue has an associated graph then we need to take the CG and pass
   // it to the graph to create a node, rather than submit it to the scheduler.
-  if (auto GraphImpl = Queue->getCommandGraph(); GraphImpl) {
+  if (IsQueueBeingRecorded) {
+    auto GraphImpl = Queue->getCommandGraph();
     auto EventImpl = detail::event_impl::create_completed_host_event();
     EventImpl->setSubmittedQueue(Queue->weak_from_this());
     ext::oneapi::experimental::detail::node_impl *NodeImpl = nullptr;
@@ -2364,11 +2368,17 @@ void handler::setKernelWorkGroupMem(size_t Size) {
 }
 
 void handler::ext_oneapi_graph(
+#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
     ext::oneapi::experimental::command_graph<
         ext::oneapi::experimental::graph_state::executable>
-        Graph) {
+        Graph
+#else
+    const ext::oneapi::experimental::command_graph<
+        ext::oneapi::experimental::graph_state::executable> &Graph
+#endif
+) {
   setType(detail::CGType::ExecCommandBuffer);
-  impl->MExecGraph = detail::getSyclObjImpl(Graph);
+  impl->MExecGraph = detail::getSyclObjImpl(Graph).get();
 }
 
 std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp
index 04d2f81281de2..e5cf1fbe4f005 100644
--- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp
@@ -61,8 +61,7 @@ class LimitedHandler {
     handler_impl(std::shared_ptr<MockQueueImpl> Queue) : MQueue(Queue) {}
     std::shared_ptr<MockQueueImpl> MQueue;
     MockQueueImpl &get_queue() { return *MQueue; }
-    std::shared_ptr<ext::oneapi::experimental::detail::exec_graph_impl>
-        MExecGraph;
+    ext::oneapi::experimental::detail::exec_graph_impl *MExecGraph;
   };
   std::shared_ptr<handler_impl> impl;
   std::shared_ptr<detail::kernel_impl> MKernel;