Skip to content

Commit f43a219

Browse files
committed
[SYCL][XPTI] Use app captured function name for command_group events payload.
When user has captured a code location in TLS before calling queue.submit, use the captured code location function name if exist instead the kernel name for the command group event payload. Kernel name is still added as metadata, only the payload changes. If user did not capture code location in TLS then use the kernel name for backward compatability. Additional version for queue.submit_impl functions added in order to propegate this state without breaking ABI. Signed-off-by: Guy Zadicario <[email protected]>
1 parent 8747652 commit f43a219

File tree

11 files changed

+125
-39
lines changed

11 files changed

+125
-39
lines changed

sycl/include/sycl/detail/common.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ class __SYCL_EXPORT tls_code_loc_t {
143143
/// @return The code location information saved in the TLS slot. If not TLS
144144
/// entry has been set up, a default coe location is returned.
145145
const detail::code_location &query();
146+
/// @brief Returns true if the TLS slot was cleared when this object was
147+
/// cunstructed.
148+
bool isToplevel() const { return !MLocalScope; }
146149

147150
private:
148151
// The flag that is used to determine if the object is in a local scope or in

sycl/include/sycl/handler.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,10 @@ class __SYCL_EXPORT handler {
577577

578578
/// Saves the location of user's code passed in \p CodeLoc for future usage in
579579
/// finalize() method.
580-
void saveCodeLoc(detail::code_location CodeLoc) { MCodeLoc = CodeLoc; }
580+
void saveCodeLoc(detail::code_location CodeLoc, bool IsTopCodeLoc) {
581+
MCodeLoc = CodeLoc;
582+
MIsTopCodeLoc = IsTopCodeLoc;
583+
}
581584

582585
/// Constructs CG object of specific type, passes it to Scheduler and
583586
/// returns sycl::event object representing the command group.
@@ -3365,6 +3368,7 @@ class __SYCL_EXPORT handler {
33653368
std::unique_ptr<detail::HostKernelBase> MHostKernel;
33663369

33673370
detail::code_location MCodeLoc = {};
3371+
bool MIsTopCodeLoc = true;
33683372
bool MIsFinalized = false;
33693373
event MLastEvent;
33703374

sycl/include/sycl/queue.hpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
356356
};
357357

358358
return submit_impl_and_postprocess(CGF, TlsCodeLocCapture.query(),
359-
PostProcess);
359+
PostProcess,
360+
TlsCodeLocCapture.isToplevel());
360361
#else
361-
return submit_impl(CGF, TlsCodeLocCapture.query());
362+
return submit_impl(CGF, TlsCodeLocCapture.query(),
363+
TlsCodeLocCapture.isToplevel());
362364
#endif // __SYCL_USE_FALLBACK_ASSERT
363365
}
364366

@@ -395,9 +397,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
395397
};
396398

397399
return submit_impl_and_postprocess(CGF, SecondaryQueue,
398-
TlsCodeLocCapture.query(), PostProcess);
400+
TlsCodeLocCapture.query(), PostProcess,
401+
TlsCodeLocCapture.isToplevel());
399402
#else
400-
return submit_impl(CGF, SecondaryQueue, TlsCodeLocCapture.query());
403+
return submit_impl(CGF, SecondaryQueue, TlsCodeLocCapture.query(),
404+
TlsCodeLocCapture.isToplevel());
401405
#endif // __SYCL_USE_FALLBACK_ASSERT
402406
}
403407

@@ -2690,13 +2694,20 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
26902694
/// A template-free version of submit.
26912695
event submit_impl(std::function<void(handler &)> CGH,
26922696
const detail::code_location &CodeLoc);
2697+
event submit_impl(std::function<void(handler &)> CGH,
2698+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
26932699
/// A template-free version of submit.
26942700
event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
26952701
const detail::code_location &CodeLoc);
2702+
event submit_impl(std::function<void(handler &)> CGH, queue secondQueue,
2703+
const detail::code_location &CodeLoc, bool IsTopCodeLoc);
26962704

26972705
/// A template-free version of submit_without_event.
26982706
void submit_without_event_impl(std::function<void(handler &)> CGH,
26992707
const detail::code_location &CodeLoc);
2708+
void submit_without_event_impl(std::function<void(handler &)> CGH,
2709+
const detail::code_location &CodeLoc,
2710+
bool IsTopCodeLoc);
27002711

27012712
/// Submits a command group function object to the queue, in order to be
27022713
/// scheduled for execution on the device.
@@ -2712,7 +2723,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27122723
// TODO: Revisit whether we can avoid this.
27132724
submit(CGF, TlsCodeLocCapture.query());
27142725
#else
2715-
submit_without_event_impl(CGF, TlsCodeLocCapture.query());
2726+
submit_without_event_impl(CGF, TlsCodeLocCapture.query(),
2727+
TlsCodeLocCapture.isToplevel());
27162728
#endif // __SYCL_USE_FALLBACK_ASSERT
27172729
}
27182730

@@ -2732,6 +2744,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27322744
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
27332745
const detail::code_location &CodeLoc,
27342746
const SubmitPostProcessF &PostProcess);
2747+
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2748+
const detail::code_location &CodeLoc,
2749+
const SubmitPostProcessF &PostProcess,
2750+
bool IsTopCodeLoc);
27352751
/// A template-free version of submit.
27362752
/// \param CGH command group function/handler
27372753
/// \param secondQueue fallback queue
@@ -2742,6 +2758,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27422758
queue secondQueue,
27432759
const detail::code_location &CodeLoc,
27442760
const SubmitPostProcessF &PostProcess);
2761+
event submit_impl_and_postprocess(std::function<void(handler &)> CGH,
2762+
queue secondQueue,
2763+
const detail::code_location &CodeLoc,
2764+
const SubmitPostProcessF &PostProcess,
2765+
bool IsTopCodeLoc);
27452766

27462767
/// parallel_for_impl with a kernel represented as a lambda + range that
27472768
/// specifies global size only.

sycl/include/sycl/reduction.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,7 @@ template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func) {
11731173
handler AuxHandler(CGH.MQueue, CGH.eventNeeded());
11741174
if (!createSyclObjFromImpl<queue>(CGH.MQueue).is_in_order())
11751175
AuxHandler.depends_on(E);
1176-
AuxHandler.saveCodeLoc(CGH.MCodeLoc);
1176+
AuxHandler.saveCodeLoc(CGH.MCodeLoc, CGH.MIsTopCodeLoc);
11771177
Func(AuxHandler);
11781178
CGH.MLastEvent = AuxHandler.finalize();
11791179
return;

sycl/source/detail/cg.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ class CG {
188188
std::vector<detail::EventImplPtr> MEvents;
189189
};
190190

191-
CG(CGType Type, StorageInitHelper D, detail::code_location loc = {})
191+
CG(CGType Type, StorageInitHelper D, detail::code_location loc = {},
192+
bool IsTopCodeLoc = true)
192193
: MType(Type), MData(std::move(D)) {
193194
// Capture the user code-location from Q.submit(), Q.parallel_for()
194195
// etc for later use; if code location information is not available,
@@ -199,6 +200,7 @@ class CG {
199200
MFileName = loc.fileName();
200201
MLine = loc.lineNumber();
201202
MColumn = loc.columnNumber();
203+
MIsTopCodeLoc = IsTopCodeLoc;
202204
}
203205

204206
CG(CG &&CommandGroup) = default;
@@ -240,6 +242,7 @@ class CG {
240242
std::string MFunctionName, MFileName;
241243
// Storage for line and column of code location
242244
int32_t MLine, MColumn;
245+
bool MIsTopCodeLoc;
243246
};
244247

245248
/// "Execute kernel" command group class.

sycl/source/detail/queue_impl.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,10 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
352352
const std::shared_ptr<queue_impl> &SecondaryQueue,
353353
bool CallerNeedsEvent,
354354
const detail::code_location &Loc,
355+
bool IsTopCodeLoc,
355356
const SubmitPostProcessF *PostProcess) {
356357
handler Handler(Self, PrimaryQueue, SecondaryQueue, CallerNeedsEvent);
357-
Handler.saveCodeLoc(Loc);
358+
Handler.saveCodeLoc(Loc, IsTopCodeLoc);
358359

359360
{
360361
NestedCallsTracker tracker;
@@ -395,7 +396,8 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
395396
// finishes execution.
396397
event FlushEvent = submit_impl(
397398
[&](handler &ServiceCGH) { Stream->generateFlushCommand(ServiceCGH); },
398-
Self, PrimaryQueue, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc, {});
399+
Self, PrimaryQueue, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc,
400+
IsTopCodeLoc, {});
399401
EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent));
400402
registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent));
401403
}
@@ -412,7 +414,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
412414
CGH.depends_on(DepEvents);
413415
HandlerFunc(CGH);
414416
},
415-
Self, {});
417+
Self, {}, true);
416418
}
417419

418420
template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>

sycl/source/detail/queue_impl.hpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -404,16 +404,17 @@ class queue_impl {
404404
event submit(const std::function<void(handler &)> &CGF,
405405
const std::shared_ptr<queue_impl> &Self,
406406
const std::shared_ptr<queue_impl> &SecondQueue,
407-
const detail::code_location &Loc,
407+
const detail::code_location &Loc, bool IsTopCodeLoc,
408408
const SubmitPostProcessF *PostProcess = nullptr) {
409409
event ResEvent;
410410
try {
411411
ResEvent = submit_impl(CGF, Self, Self, SecondQueue,
412-
/*CallerNeedsEvent=*/true, Loc, PostProcess);
412+
/*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc,
413+
PostProcess);
413414
} catch (...) {
414-
ResEvent =
415-
SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue,
416-
/*CallerNeedsEvent=*/true, Loc, PostProcess);
415+
ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue,
416+
/*CallerNeedsEvent=*/true, Loc,
417+
IsTopCodeLoc, PostProcess);
417418
}
418419
return discard_or_return(ResEvent);
419420
}
@@ -428,19 +429,20 @@ class queue_impl {
428429
/// \return a SYCL event object for the submitted command group.
429430
event submit(const std::function<void(handler &)> &CGF,
430431
const std::shared_ptr<queue_impl> &Self,
431-
const detail::code_location &Loc,
432+
const detail::code_location &Loc, bool IsTopCodeLoc,
432433
const SubmitPostProcessF *PostProcess = nullptr) {
433-
auto ResEvent = submit_impl(CGF, Self, Self, nullptr,
434-
/*CallerNeedsEvent=*/true, Loc, PostProcess);
434+
auto ResEvent =
435+
submit_impl(CGF, Self, Self, nullptr,
436+
/*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, PostProcess);
435437
return discard_or_return(ResEvent);
436438
}
437439

438440
void submit_without_event(const std::function<void(handler &)> &CGF,
439441
const std::shared_ptr<queue_impl> &Self,
440-
const detail::code_location &Loc,
442+
const detail::code_location &Loc, bool IsTopCodeLoc,
441443
const SubmitPostProcessF *PostProcess = nullptr) {
442444
submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc,
443-
PostProcess);
445+
IsTopCodeLoc, PostProcess);
444446
}
445447

446448
/// Performs a blocking wait for the completion of all enqueued tasks in the
@@ -894,7 +896,7 @@ class queue_impl {
894896
const std::shared_ptr<queue_impl> &PrimaryQueue,
895897
const std::shared_ptr<queue_impl> &SecondaryQueue,
896898
bool CallerNeedsEvent, const detail::code_location &Loc,
897-
const SubmitPostProcessF *PostProcess);
899+
bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess);
898900

899901
/// Helper function for submitting a memory operation with a handler.
900902
/// \param Self is a shared_ptr to this queue.

sycl/source/detail/scheduler/commands.cpp

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,7 @@ void instrumentationAddExtraKernelMetadata(
20322032
}
20332033

20342034
void instrumentationFillCommonData(const std::string &KernelName,
2035+
const std::string &FuncName,
20352036
const std::string &FileName, uint64_t Line,
20362037
uint64_t Column, const void *const Address,
20372038
const QueueImplPtr &Queue,
@@ -2048,8 +2049,9 @@ void instrumentationFillCommonData(const std::string &KernelName,
20482049
xpti::payload_t Payload;
20492050
if (!FileName.empty()) {
20502051
// File name has a valid string
2051-
Payload = xpti::payload_t(KernelName.c_str(), FileName.c_str(), Line,
2052-
Column, Address);
2052+
Payload = xpti::payload_t(FuncName.empty() ? KernelName.c_str()
2053+
: FuncName.c_str(),
2054+
FileName.c_str(), Line, Column, Address);
20532055
HasSourceInfo = true;
20542056
} else if (Address) {
20552057
// We have a valid function name and an address
@@ -2097,8 +2099,9 @@ void instrumentationFillCommonData(const std::string &KernelName,
20972099
#ifdef XPTI_ENABLE_INSTRUMENTATION
20982100
std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
20992101
int32_t StreamID, const std::shared_ptr<detail::kernel_impl> &SyclKernel,
2100-
const detail::code_location &CodeLoc, const std::string &SyclKernelName,
2101-
const QueueImplPtr &Queue, const NDRDescT &NDRDesc,
2102+
const detail::code_location &CodeLoc, bool IsTopCodeLoc,
2103+
const std::string &SyclKernelName, const QueueImplPtr &Queue,
2104+
const NDRDescT &NDRDesc,
21022105
const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
21032106
std::vector<ArgDesc> &CGArgs) {
21042107

@@ -2117,9 +2120,17 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
21172120

21182121
std::string FileName =
21192122
CodeLoc.fileName() ? CodeLoc.fileName() : std::string();
2120-
instrumentationFillCommonData(KernelName, FileName, CodeLoc.lineNumber(),
2121-
CodeLoc.columnNumber(), Address, Queue,
2122-
FromSource, InstanceID, CmdTraceEvent);
2123+
2124+
// If code location is above sycl layer, use function name from code
2125+
// location instead of kernel name in event payload
2126+
std::string FuncName = (!IsTopCodeLoc && CodeLoc.functionName())
2127+
? CodeLoc.functionName()
2128+
: std::string();
2129+
2130+
instrumentationFillCommonData(KernelName, FuncName, FileName,
2131+
CodeLoc.lineNumber(), CodeLoc.columnNumber(),
2132+
Address, Queue, FromSource, InstanceID,
2133+
CmdTraceEvent);
21232134

21242135
if (CmdTraceEvent) {
21252136
// Stash the queue_id mutable metadata in TLS
@@ -2146,6 +2157,7 @@ void ExecCGCommand::emitInstrumentationData() {
21462157
return;
21472158

21482159
std::string KernelName;
2160+
std::string FuncName;
21492161
std::optional<bool> FromSource;
21502162
switch (MCommandGroup->getType()) {
21512163
case detail::CGType::Kernel: {
@@ -2160,8 +2172,13 @@ void ExecCGCommand::emitInstrumentationData() {
21602172
break;
21612173
}
21622174

2175+
// If code location is above sycl layer, use function name from code
2176+
// location instead of kernel name in event payload
2177+
if (!MCommandGroup->MIsTopCodeLoc)
2178+
FuncName = MCommandGroup->MFunctionName;
2179+
21632180
xpti_td *CmdTraceEvent = nullptr;
2164-
instrumentationFillCommonData(KernelName, MCommandGroup->MFileName,
2181+
instrumentationFillCommonData(KernelName, FuncName, MCommandGroup->MFileName,
21652182
MCommandGroup->MLine, MCommandGroup->MColumn,
21662183
MAddress, MQueue, FromSource, MInstanceID,
21672184
CmdTraceEvent);

sycl/source/detail/scheduler/commands.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,9 @@ class ExecCGCommand : public Command {
701701
#ifdef XPTI_ENABLE_INSTRUMENTATION
702702
std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
703703
int32_t StreamID, const std::shared_ptr<detail::kernel_impl> &SyclKernel,
704-
const detail::code_location &CodeLoc, const std::string &SyclKernelName,
705-
const QueueImplPtr &Queue, const NDRDescT &NDRDesc,
704+
const detail::code_location &CodeLoc, bool IsTopCodeLoc,
705+
const std::string &SyclKernelName, const QueueImplPtr &Queue,
706+
const NDRDescT &NDRDesc,
706707
const std::shared_ptr<detail::kernel_bundle_impl> &KernelBundleImplPtr,
707708
std::vector<ArgDesc> &CGArgs);
708709
#endif

sycl/source/handler.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,8 @@ event handler::finalize() {
266266
// uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent,
267267
int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME);
268268
auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData(
269-
StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue,
270-
impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs);
269+
StreamID, MKernel, MCodeLoc, MIsTopCodeLoc, MKernelName.c_str(),
270+
MQueue, impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs);
271271
auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent,
272272
InstanceID = InstanceID]() {
273273
#else
@@ -515,6 +515,10 @@ event handler::finalize() {
515515
throw exception(make_error_code(errc::runtime),
516516
"Internal Error. Command group cannot be constructed.");
517517

518+
// Propagate MIsTopCodeLoc state to CommandGroup.
519+
// Will be used for XPTI payload generation for CG's related events.
520+
CommandGroup->MIsTopCodeLoc = MIsTopCodeLoc;
521+
518522
// If there is a graph associated with the handler we are in the explicit
519523
// graph mode, so we store the CG instead of submitting it to the scheduler,
520524
// so it can be retrieved by the graph later.

0 commit comments

Comments
 (0)