Skip to content

Commit 8886912

Browse files
committed
[SYCL][XPTI] Emit XPTI events for kernel nodes added to user graph.
When graph is finalized kernels can be directly enqueued to a command buffer without using the scheduler, in this case XPTI events for the node_create/task_begin/task_end were missing. Save code location from tls when CGF is added to a graph through graph.add API. This captures user stored code location in TLS, if one was set before calling graph.add Signed-off-by: Guy Zadicario <[email protected]>
1 parent f43a219 commit 8886912

File tree

2 files changed

+43
-3
lines changed

2 files changed

+43
-3
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <detail/queue_impl.hpp>
1414
#include <detail/scheduler/commands.hpp>
1515
#include <detail/sycl_mem_obj_t.hpp>
16+
#include <sycl/detail/common.hpp>
1617
#include <sycl/feature_test.hpp>
1718
#include <sycl/queue.hpp>
1819

@@ -372,6 +373,15 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
372373
const std::vector<std::shared_ptr<node_impl>> &Dep) {
373374
(void)Args;
374375
sycl::handler Handler{Impl};
376+
377+
// save code location if one was set in TLS.
378+
// idealy it would be nice to capture user's call code location
379+
// by adding a parameter to the graph.add function, but this will
380+
// break the API. At least capture code location from TLS, user
381+
// can set it before calling graph.add
382+
sycl::detail::tls_code_loc_t Tls;
383+
Handler.saveCodeLoc(Tls.query(), Tls.isToplevel());
384+
375385
CGF(Handler);
376386

377387
if (Handler.getType() == sycl::detail::CGType::Barrier) {
@@ -667,6 +677,23 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
667677
}
668678
ur_exp_command_buffer_sync_point_t NewSyncPoint;
669679
ur_exp_command_buffer_command_handle_t NewCommand = 0;
680+
681+
#ifdef XPTI_ENABLE_INSTRUMENTATION
682+
int32_t StreamID = xptiRegisterStream(sycl::detail::SYCL_STREAM_NAME);
683+
sycl::detail::CGExecKernel *CGExec =
684+
static_cast<sycl::detail::CGExecKernel *>(Node->MCommandGroup.get());
685+
sycl::detail::code_location CodeLoc(CGExec->MFileName.c_str(),
686+
CGExec->MFunctionName.c_str(),
687+
CGExec->MLine, CGExec->MColumn);
688+
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
689+
StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
690+
CGExec->MKernelName.c_str(), nullptr, CGExec->MNDRDesc,
691+
CGExec->MKernelBundle, CGExec->MArgs);
692+
if (CmdTraceEvent)
693+
sycl::detail::emitInstrumentationGeneral(
694+
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr);
695+
#endif
696+
670697
ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel(
671698
Ctx, DeviceImpl, CommandBuffer,
672699
*static_cast<sycl::detail::CGExecKernel *>((Node->MCommandGroup.get())),
@@ -679,6 +706,12 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx,
679706
"Failed to add kernel to UR command-buffer");
680707
}
681708

709+
#ifdef XPTI_ENABLE_INSTRUMENTATION
710+
if (CmdTraceEvent)
711+
sycl::detail::emitInstrumentationGeneral(
712+
StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr);
713+
#endif
714+
682715
return NewSyncPoint;
683716
}
684717

sycl/source/detail/scheduler/commands.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,8 +2007,10 @@ void instrumentationAddExtraKernelMetadata(
20072007
Program = SyclKernel->getProgramRef();
20082008
if (!SyclKernel->isCreatedFromSource())
20092009
EliminatedArgMask = SyclKernel->getKernelArgMask();
2010-
} else {
2011-
assert(Queue && "Kernel submissions should have an associated queue");
2010+
} else if (Queue) {
2011+
// NOTE: Queue can be null when kerner is directly enqueued to a command
2012+
// buffer
2013+
// by graph API, when a midifiable graph is finalized.
20122014
std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) =
20132015
detail::ProgramManager::getInstance().getOrCreateKernel(
20142016
Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName);
@@ -2134,7 +2136,12 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
21342136

21352137
if (CmdTraceEvent) {
21362138
// Stash the queue_id mutable metadata in TLS
2137-
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue));
2139+
// NOTE: Queue can be null when kerner is directly enqueued to a command
2140+
// buffer
2141+
// by graph API, when a midifiable graph is finalized.
2142+
if (Queue.get())
2143+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
2144+
getQueueID(Queue));
21382145
instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
21392146
KernelBundleImplPtr, SyclKernelName,
21402147
SyclKernel, Queue, CGArgs);

0 commit comments

Comments
 (0)