diff --git a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
index a019ba515fc4d..d0c7c3e59869a 100644
--- a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
+++ b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
@@ -226,34 +226,37 @@ by the SYCL runtime.
## SYCL Stream `"ur.call"` Notification Signatures
-| Trace Point Type | Parameter Description | Metadata |
-| :--------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------- |
-| `function_begin` |
**trace_type**: `xpti::trace_point_type_t::function_begin` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call` layer. **event**: `nullptr` - since the stream of data just captures functions being called. **instance**: Unique ID to allow the correlation of the `function_begin` event with the `function_end` event. **user_data**: Name of the function being called sent in as `const char *` | None |
-| `function_end` | **trace_type**: `xpti::trace_point_type_t::function_end` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call` layer. **event**: `nullptr` - since the stream of data just captures functions being called. **instance**: Unique ID to allow the correlation of the `function_begin` event with the `function_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_begin` **user_data**: Name of the function being called sent in as `const char *` | None |
+| Trace Point Type | Parameter Description | Metadata |
+| :------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------- |
+| `function_with_args_begin` | **trace_type**: `xpti::trace_point_type_t::function_with_args_begin` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call` layer. **event**: `nullptr` if code location is not available or event ID with code location data. **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, and arguments. | None |
+| `function_with_args_end` | **trace_type**: `xpti::trace_point_type_t::function_with_args_end` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call` layer. **event**: `nullptr` if code location is not available or event ID with code location data. **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_with_args_begin` **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value. | None |
## SYCL Stream `"ur.call.debug"` Notification Signatures
-| Trace Point Type | Parameter Description | Metadata |
-| :------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------- |
-| `function_with_args_begin` | **trace_type**: `xpti::trace_point_type_t::function_with_args_begin` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call.debug` layer. **event**: `nullptr` if code location is not available or event ID with code location data. **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, and arguments. | None |
-| `function_with_args_end` | **trace_type**: `xpti::trace_point_type_t::function_with_args_end` that marks the beginning of a function **parent**: Event ID created for all functions in the `ur.call.debug` layer. **event**: `nullptr` if code location is not available or event ID with code location data. **instance**: Unique ID to allow the correlation of the `function_with_args_begin` event with the `function_with_args_end` event. This value is guaranteed to be the same value received by the trace event for the corresponding `function_with_args_begin` **user_data**: A pointer to `function_with_args_t` object, that includes function ID, name, arguments, and return value. | None |
+The `"ur.call.debug"` stream emits the same notifications as the `"ur.call"` stream, but with additional metadata describing the source code location of each traced function call. This enables tools to correlate traced events with their origin in the application's source code for enhanced debugging and analysis.
+
+If a tool subscribes to both `"ur.call"` and `"ur.call.debug"`, only notifications from `"ur.call.debug"` will be delivered to avoid duplication.
-## SYCL Stream `"sycl"` Notification Signatures
+## SYCL Stream `"sycl"` and `"sycl.debug"` Notification Signatures
All trace point types in bold provide semantic information about the graph, nodes and edges and the topology of the asynchronous task graphs created by the runtime.
+
+The `"sycl.debug"` stream emits the same notifications as the `"sycl"` stream, but with additional metadata. If toolchains want to keep the overhead low then subscribing to `"sycl"` stream is the right option, if toolchains want to get more data and keeping overheads low is not important then they should subscribe to `"sycl.debug"`.
+
+If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications from `"sycl.debug"` will be delivered to avoid duplication.
| Trace Point Type | Parameter Description | Metadata |
| :----------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **`graph_create`** | **trace_type**: `xpti::trace_point_type_t::graph_create` that marks the creation of an asynchronous graph. **parent**: `nullptr` **event**: The global asynchronous graph object ID. All other graph related events such as node and edge creation will always this ID as the parent ID. **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to. **user_data**: `nullptr` SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
| None |
-| **`node_create`** | **trace_type**: `xpti::trace_point_type_t::node_create` that marks the creation of a node in the graph, which could be a computational kernel or memory operation. **parent**: The global graph event that is created during the `graph_create` event. **event**: The unique ID that identifies the data parallel compute operation or memory operation. **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to. **user_data**: Command type that has been submitted through the command group handler, which could be one of: `command_group_node`, `memory_transfer_node`, `memory_allocation_node`, `sub_buffer_creation_node`, `memory_deallocation_node`, `host_acc_create_buffer_lock_node`, `host_acc_destroy_buffer_release_node` combined with the address of the command group object and represented as a string [`const char *`] SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
| Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name`, `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`. The per-queue unique ID can be obtained by using `xptiGetStashedTuple` API call. See `queue_create` documentation for usage information. Memory operations `memory_object`, `offset`, `access_range`, `allocation_type`, `copy_from`, `copy_to`,`device_id`, `device_name`, `memory_size`, `src_memory_ptr`, `dest_memory_ptr`, `memory_ptr`, `value_set`. The per-queue unique ID can be obtained by using `xptiGetSTashedTuple` API call. See `queue_create` documentation for usage information. |
+| **`node_create`** | **trace_type**: `xpti::trace_point_type_t::node_create` that marks the creation of a node in the graph, which could be a computational kernel or memory operation. **parent**: The global graph event that is created during the `graph_create` event. **event**: The unique ID that identifies the data parallel compute operation or memory operation. **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to. **user_data**: Command type that has been submitted through the command group handler, which could be one of: `command_group_node`, `memory_transfer_node`, `memory_allocation_node`, `sub_buffer_creation_node`, `memory_deallocation_node`, `host_acc_create_buffer_lock_node`, `host_acc_destroy_buffer_release_node` combined with the address of the command group object and represented as a string [`const char *`] SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
| "sycl" stream: Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name`. The per-queue unique ID can be obtained by using `xptiGetStashedTuple` API call. See `queue_create` documentation for usage information. Memory operations `memory_object`, `offset`, `access_range`, `allocation_type`, `copy_from`, `copy_to`,`device_id`, `device_name`, `memory_size`, `src_memory_ptr`, `dest_memory_ptr`, `memory_ptr`, `value_set`. The per-queue unique ID can be obtained by using `xptiGetSTashedTuple` API call. See `queue_create` documentation for usage information. Additional metadata on "sycl.debug" stream: Computational Kernels `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`. |
| **`edge_create`** | **trace_type**: `xpti::trace_point_type_t::graph_create` that marks the creation of an asynchronous graph. **parent**: The global graph event that is created during the `graph_create` event. **event**: The unique ID that identifies the dependence relationship between two operations. **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to. **user_data**: `nullptr` Edges capture dependence relationships between computations or computations and memory operations.
| `access_mode`, `memory_object`, `event` |
| `task_begin` | **trace_type**: `xpti::trace_point_type_t::task_begin` that marks the beginning of a task belonging to one of the nodes in the graph. When the trace event is for a kernel executing on a device other than the the CPU, this `task_begin` and corresponding `task_end` mark the submit call. To track the execution of the kernel on the device, the `trace_signal` event must be monitored to get the kernel event handle from which the execution statistics can be gathered. **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the computation or memory operation kernel, which would be one of the nodes in the graph. **instance**: Instance ID for the task that can be used to correlate it with the corresponding `task_end` trace event. **user_data**: `nullptr` | Same metadata defined for the node the trace task belongs to. |
| `task_end` | **trace_type**: `xpti::trace_point_type_t::task_end` that marks the end of a task belonging to one of the nodes in the graph. The specific task instance can be tacked through the instance ID parameter which helps correlate the `task_end` with the corresponding `task_begin`. **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the computation or memory operation kernel, which would be one of the nodes in the graph. **instance**: Instance ID for the task that can be used to correlate it with the corresponding `task_begin` trace event. **user_data**: `nullptr` | Same metadata defined for the node the trace task belongs to. |
| `signal` | **trace_type**: `xpti::trace_point_type_t::signal` that marks the an event that contains the `event` handle of an executing kernel on a device. **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the computation or memory operation kernel, which would be one of the nodes in the graph. **instance**: Instance ID for the task for which the signal has been generated. **user_data**: Address of the kernel event that is returned by the device so the progress of the execution can be tracked. | Same metadata defined for the node the trace task belongs to. |
-| `wait_begin` | **trace_type**: `xpti::trace_point_type_t::wait_begin` that marks the beginning of the wait on an `event` **parent**: `nullptr` **event**: The event ID will reflect the ID of the command group object submission that created this event, the queue or a new event based on the combination of the string "queue.wait" and the address of the event. **instance**: Unique ID to allow the correlation of the `wait_begin` event with the `wait_end` event. **user_data**: String indicating `queue.wait` and the address of the event sent in as `const char *` Tracing the `queue.wait()` or `queue.wait_and_throw()` will capture the waiting on the action represented by the event object, which could be the execution of a kernel, completion of a memory operation, etc that is embedded in the command group handler. All wait events contain metadata that indicates the SYCL device on which the corresponding operation has been submitted. If the event is from a command group handler, then the source location information is available as well.
| `sycl_device`, `sycl_device_type`, `sycl_device_name`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
-| `wait_end` | **trace_type**: `xpti::trace_point_type_t::wait_end` that marks the beginning of the wait on an `event` **parent**: `nullptr` **event**: The event ID will reflect the ID of the command group object submission that created this event, the queue or a new event based on the combination of the string "queue.wait" and the address of the event. **instance**: Unique ID to allow the correlation of the `wait_begin` event with the `wait_end` event. **user_data**: String indicating `queue.wait` and the address of the event as `const char *` | `sycl_device`, `sycl_device_type`, `sycl_device_name`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
-| `barrier_begin` | **trace_type**: `xpti::trace_point_type_t::barrier_begin` that marks the beginning of a barrier while enqueuing a command group object **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the command group object that has encountered a barrier during the enqueue operation. **instance**: Unique ID to allow the correlation of the `barrier_begin` event with the `barrier_end` event. **user_data**: String indicating `enqueue.barrier` and the reason for the barrier as a `const char *` The reason for the barrier could be one of `Buffer locked by host accessor`, `Blocked by host task` or `Unknown reason`.
| Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name`, `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` Memory operations `memory_object`, `offset`, `access_range_start`, `access_range_end`, `allocation_type`, `copy_from`, `copy_to` |
-| `barrier_end` | **trace_type**: `xpti::trace_point_type_t::barrier_end` that marks the end of the barrier that is encountered during enqueue. **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the command group object that has encountered a barrier during the enqueue operation. **instance**: Unique ID to allow the correlation of the `barrier_begin` event with the `barrier_end` event. **user_data**: String indicating `enqueue.barrier` and the reason for the barrier as a `const char *` The reason for the barrier could be one of `Buffer locked by host accessor`, `Blocked by host task` or `Unknown reason`.
| Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name`, `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` Memory operations `memory_object`, `offset`, `access_range_start`, `access_range_end`, `allocation_type`, `copy_from`, `copy_to` |
-| `diagnostics` | **trace_type**: `xpti::trace_point_type_t::diagnostics` that represents general purpose notifications. For example, it is emitted when an exception is thrown in SYCL runtime. **parent**: Set to NULL. **event**: The event ID will reflect the code location of notification origin, if available. **instance**: An instance ID that records the number of times this code location has been seen. **user_data**: String with diagnostic message as a `const char *` | `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
+| `wait_begin` | **trace_type**: `xpti::trace_point_type_t::wait_begin` that marks the beginning of the wait on an `event` **parent**: `nullptr` **event**: The event ID will reflect the ID of the command group object submission that created this event, the queue or a new event based on the combination of the string "queue.wait" and the address of the event. **instance**: Unique ID to allow the correlation of the `wait_begin` event with the `wait_end` event. **user_data**: String indicating `queue.wait` and the address of the event sent in as `const char *` Tracing the `queue.wait()` or `queue.wait_and_throw()` will capture the waiting on the action represented by the event object, which could be the execution of a kernel, completion of a memory operation, etc that is embedded in the command group handler. All wait events contain metadata that indicates the SYCL device on which the corresponding operation has been submitted. If the event is from a command group handler, then the source location information is available as well.
| "sycl.stream": `sycl_device`, `sycl_device_type`, `sycl_device_name` Additional metadata on "sycl.debug" stream: `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
+| `wait_end` | **trace_type**: `xpti::trace_point_type_t::wait_end` that marks the beginning of the wait on an `event` **parent**: `nullptr` **event**: The event ID will reflect the ID of the command group object submission that created this event, the queue or a new event based on the combination of the string "queue.wait" and the address of the event. **instance**: Unique ID to allow the correlation of the `wait_begin` event with the `wait_end` event. **user_data**: String indicating `queue.wait` and the address of the event as `const char *` | "sycl" stream: `sycl_device`, `sycl_device_type`, `sycl_device_name` Additional metadata on "sycl.debug" stream: `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
+| `barrier_begin` | **trace_type**: `xpti::trace_point_type_t::barrier_begin` that marks the beginning of a barrier while enqueuing a command group object **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the command group object that has encountered a barrier during the enqueue operation. **instance**: Unique ID to allow the correlation of the `barrier_begin` event with the `barrier_end` event. **user_data**: String indicating `enqueue.barrier` and the reason for the barrier as a `const char *` The reason for the barrier could be one of `Buffer locked by host accessor`, `Blocked by host task` or `Unknown reason`.
| "sycl" stream: Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name` Memory operations `memory_object`, `offset`, `access_range_start`, `access_range_end`, `allocation_type`, `copy_from`, `copy_to` Additional metadata on "sycl.debug" stream: Computational Kernels `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
+| `barrier_end` | **trace_type**: `xpti::trace_point_type_t::barrier_end` that marks the end of the barrier that is encountered during enqueue. **parent**: The global graph event that is created during the `graph_create` event. **event**: The event ID will reflect the ID of the command group object that has encountered a barrier during the enqueue operation. **instance**: Unique ID to allow the correlation of the `barrier_begin` event with the `barrier_end` event. **user_data**: String indicating `enqueue.barrier` and the reason for the barrier as a `const char *` The reason for the barrier could be one of `Buffer locked by host accessor`, `Blocked by host task` or `Unknown reason`.
| "sycl" stream: Computational Kernels `sycl_device`, `sycl_device_type`, `sycl_device_name`, `kernel_name` Memory operations `memory_object`, `offset`, `access_range_start`, `access_range_end`, `allocation_type`, `copy_from`, `copy_to` Additional metadata on "sycl.debug" stream: Computational Kernels `from_source`, `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
+| `diagnostics` | **trace_type**: `xpti::trace_point_type_t::diagnostics` that represents general purpose notifications. For example, it is emitted when an exception is thrown in SYCL runtime. **parent**: Set to NULL. **event**: The event ID will reflect the code location of notification origin, if available. **instance**: An instance ID that records the number of times this code location has been seen. **user_data**: String with diagnostic message as a `const char *` | On "sycl.debug" stream: `sym_function_name`, `sym_source_file_name`, `sym_line_no`, `sym_column_no` |
| `queue_create` | **trace_type**: `xpti::trace_point_type_t::queue_create` that marks the creation of a queue, which could be a device or host queue. **parent**: Set to NULL. **event**: The event ID will reflect the code location of notification origin, if available. **instance**: Will contain the instance ID of the queue, which is a per-queue unique identifier. For example, if the queue is created in a loop, the **event** will be the same as it happens at the same code location, but the **instance** will help differentiate between the different queues being created and used. **user_data**: Not meaningful for this trace type. Could contain string with 'queue_create' or nullptr. This signal is emitted only once for every queue object, notifies about successful queue creation (the signal is not emitted if any exception happens during queue creation).
| `sycl_context`, `sycl_device_name`, `sycl_device`, `is_inorder`, `queue_handle` `queue_id` field has been deprecated and replaced with the **instance** information and supporting XPTI API calls (`xptiGetStashedTuple`). Using the **instance** information is the recommended approach. `char *key = 0;` `uint64_t value;``if (xptiGetStashedTuple(&key, value) ==xpti::result_t::XPTI_RESULT_SUCCESS) {` `// key will contain "queue_id"` `// value will contain the per-queue unique ID``}` `queue_handle` is absent for host queue since no backend object is used.
|
| `queue_destroy` | **trace_type**: `xpti::trace_point_type_t::queue_destroy` that marks the destruction of a queue, which could be a device or host queue. **parent**: Set to NULL. **event**: The event ID will reflect the code location of notification origin, if available. **instance**: Will contain the instance ID of the queue, which is a per-queue unique identifier. **user_data**: Not meaningful for this trace type. Could contain string with 'queue_destroy' or nullptr. This signal is emitted only once for every queue object, notifies about queue destruction. Contains the same metadata set for corresponding 'queue_create' signal. **event** and corresponding metadata will be destroyed right after notification.
| `sycl_context`, `sycl_device_name`, `sycl_device`, `is_inorder`, `queue_id`, `queue_handle` `queue_id` field has been deprecated and replaced with the **instance** information and supporting XPTI API calls (`xptiGetStashedTuple`). Using the **instance** information is the recommended approach. `queue_handle` is absent for host queue since no backend object is used. |
diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp
index 0694664c88fda..b0c838cdd890c 100644
--- a/sycl/source/detail/event_impl.cpp
+++ b/sycl/source/detail/event_impl.cpp
@@ -288,7 +288,8 @@ void event_impl::wait(bool *Success) {
void *TelemetryEvent = nullptr;
uint64_t IId = 0;
std::string Name;
- TelemetryEvent = instrumentationProlog(Name, GSYCLStreamID, IId);
+ auto StreamID = detail::getActiveXPTIStreamID();
+ TelemetryEvent = instrumentationProlog(Name, StreamID, IId);
#endif
auto EventHandle = getHandle();
@@ -300,7 +301,7 @@ void event_impl::wait(bool *Success) {
detail::Scheduler::getInstance().waitForEvent(*this, Success);
#ifdef XPTI_ENABLE_INSTRUMENTATION
- instrumentationEpilog(TelemetryEvent, Name, GSYCLStreamID, IId);
+ instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
#endif
}
diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp
index 94a7e9f7b70dc..eb7d11d3b29d4 100644
--- a/sycl/source/detail/global_handler.cpp
+++ b/sycl/source/detail/global_handler.cpp
@@ -95,8 +95,8 @@ void GlobalHandler::TraceEventXPTI(const char *Message) {
xpti::framework::tracepoint_scope_t TP(
CodeLocation.fileName(), CodeLocation.functionName(),
CodeLocation.lineNumber(), CodeLocation.columnNumber(), nullptr);
-
- TP.stream(detail::GSYCLStreamID)
+ // Notify the subscriber with a diagnostic message when an exception occurs.
+ TP.stream(detail::getActiveXPTIStreamID())
.traceType(xpti::trace_point_type_t::diagnostics)
.parentEvent(GSYCLCallEvent)
.notify(static_cast(Message));
diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp
index 69b755f9a2e51..9a36bbb0b9476 100644
--- a/sycl/source/detail/graph/graph_impl.cpp
+++ b/sycl/source/detail/graph/graph_impl.cpp
@@ -731,6 +731,7 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
const bool xptiEnabled = xptiTraceEnabled();
xpti_td *CmdTraceEvent = nullptr;
uint64_t InstanceID = 0;
+ auto StreamID = detail::getActiveXPTIStreamID();
if (xptiEnabled) {
sycl::detail::CGExecKernel *CGExec =
static_cast(Node.MCommandGroup.get());
@@ -738,13 +739,12 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
CGExec->MFunctionName.c_str(),
CGExec->MLine, CGExec->MColumn);
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
- sycl::detail::GSYCLStreamID, CGExec->MSyclKernel, CodeLoc,
- CGExec->MIsTopCodeLoc, CGExec->MDeviceKernelInfo, nullptr,
- CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs);
+ StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc,
+ CGExec->MDeviceKernelInfo, nullptr, CGExec->MNDRDesc,
+ CGExec->MKernelBundle.get(), CGExec->MArgs);
if (CmdTraceEvent)
- sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID,
- InstanceID, CmdTraceEvent,
- xpti::trace_task_begin, nullptr);
+ sycl::detail::emitInstrumentationGeneral(
+ StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr);
}
#endif
@@ -764,9 +764,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(
#ifdef XPTI_ENABLE_INSTRUMENTATION
if (xptiEnabled && CmdTraceEvent)
- sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID,
- InstanceID, CmdTraceEvent,
- xpti::trace_task_end, nullptr);
+ sycl::detail::emitInstrumentationGeneral(
+ StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr);
#endif
return NewSyncPoint;
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index c59b5eaa20387..52ee77d251eb2 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -167,10 +167,12 @@ event queue_impl::memset(void *Ptr, int Value, size_t Count,
xpti::framework::tracepoint_scope_t TP(
CodeLocation.fileName(), FuncName, CodeLocation.lineNumber(),
CodeLocation.columnNumber(), (void *)this);
- TP.stream(detail::GSYCLStreamID)
+ TP.stream(detail::getActiveXPTIStreamID())
.traceType(xpti::trace_point_type_t::node_create)
.parentEvent(detail::GSYCLGraphEvent);
+ // This information is necessary for memset, so we will not guard it by debug
+ // stream check.
TP.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast(MDevice.getHandleRef()));
@@ -219,10 +221,11 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count,
xpti::framework::tracepoint_scope_t TP(
CodeLoc.fileName(), CodeLoc.functionName(), CodeLoc.lineNumber(),
CodeLoc.columnNumber(), (void *)this);
- TP.stream(detail::GSYCLStreamID)
+ TP.stream(detail::getActiveXPTIStreamID())
.traceType(xpti::trace_point_type_t::node_create)
.parentEvent(GSYCLGraphEvent);
const char *UserData = "memory_transfer_node::memcpy";
+ // We will include this metadata information as it is required for memcpy.
TP.addMetadata([&](auto TEvent) {
xpti::addMetadata(TEvent, "sycl_device",
reinterpret_cast(MDevice.getHandleRef()));
@@ -515,33 +518,32 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
if (!xptiCheckTraceEnabled(StreamID, NotificationTraceType))
return TraceEvent;
- xpti::payload_t Payload;
- bool HasSourceInfo = false;
+ xpti_tracepoint_t *Event;
// We try to create a unique string for the wait() call by combining it with
// the queue address
xpti::utils::StringHelper NG;
Name = NG.nameWithAddress("queue.wait", this);
- if (CodeLoc.fileName()) {
- // We have source code location information
- Payload =
- xpti::payload_t(Name.c_str(), CodeLoc.fileName(), CodeLoc.lineNumber(),
- CodeLoc.columnNumber(), (void *)this);
- HasSourceInfo = true;
- } else {
- // We have no location information, so we'll use the address of the queue
- Payload = xpti::payload_t(Name.c_str(), (void *)this);
- }
+ bool HasSourceInfo = CodeLoc.fileName() != nullptr;
// wait() calls could be at different user-code locations; We create a new
// event based on the code location info and if this has been seen before, a
// previously created event will be returned.
- uint64_t QWaitInstanceNo = 0;
- xpti::trace_event_data_t *WaitEvent =
- xptiMakeEvent(Name.c_str(), &Payload, xpti::trace_graph_event,
- xpti_at::active, &QWaitInstanceNo);
- IId = QWaitInstanceNo;
- if (WaitEvent) {
- xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this));
+ if (HasSourceInfo) {
+ Event = xptiCreateTracepoint(CodeLoc.functionName(), CodeLoc.fileName(),
+ CodeLoc.lineNumber(), CodeLoc.columnNumber(),
+ (void *)this);
+ } else {
+ Event = xptiCreateTracepoint(Name.c_str(), nullptr, 0, 0, (void *)this);
+ }
+
+ IId = xptiGetUniqueId();
+ auto WaitEvent = Event->event_ref();
+ // We will allow the device type to be set
+ xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this));
+ // We limit the amount of metadata that is added to the regular stream.
+ // Only "sycl.debug" stream will have the full information. This improves the
+ // performance when this data is not required by the tool or the collector.
+ if (isDebugStream(StreamID)) {
if (HasSourceInfo) {
xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
@@ -551,11 +553,11 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
WaitEvent, "sym_column_no",
static_cast((CodeLoc.columnNumber())));
}
- xptiNotifySubscribers(StreamID, xpti::trace_wait_begin, nullptr, WaitEvent,
- QWaitInstanceNo,
- static_cast(Name.c_str()));
- TraceEvent = (void *)WaitEvent;
}
+ xptiNotifySubscribers(StreamID, xpti::trace_wait_begin, nullptr, WaitEvent,
+ IId, static_cast(Name.c_str()));
+ TraceEvent = (void *)WaitEvent;
+
return TraceEvent;
}
@@ -578,13 +580,11 @@ void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name,
void queue_impl::wait(const detail::code_location &CodeLoc) {
(void)CodeLoc;
#ifdef XPTI_ENABLE_INSTRUMENTATION
- const bool xptiEnabled = xptiCheckTraceEnabled(GSYCLStreamID);
void *TelemetryEvent = nullptr;
uint64_t IId;
std::string Name;
- if (xptiEnabled) {
- TelemetryEvent = instrumentationProlog(CodeLoc, Name, GSYCLStreamID, IId);
- }
+ auto StreamID = detail::getActiveXPTIStreamID();
+ TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId);
#endif
if (!MGraph.expired()) {
@@ -664,51 +664,50 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
}
#ifdef XPTI_ENABLE_INSTRUMENTATION
- if (xptiEnabled) {
- instrumentationEpilog(TelemetryEvent, Name, GSYCLStreamID, IId);
- }
+ // There is an early return in instrumentationEpilog() if no subscribers are
+ // subscribing to queue.wait().
+ instrumentationEpilog(TelemetryEvent, Name, StreamID, IId);
#endif
}
void queue_impl::constructorNotification() {
#if XPTI_ENABLE_INSTRUMENTATION
- if (xptiTraceEnabled()) {
- constexpr uint16_t NotificationTraceType =
- static_cast(xpti::trace_point_type_t::queue_create);
- if (xptiCheckTraceEnabled(detail::GSYCLStreamID, NotificationTraceType)) {
- xpti::utils::StringHelper SH;
- std::string AddrStr = SH.addressAsString(MQueueID);
- std::string QueueName = SH.nameWithAddressString("queue", AddrStr);
- // Create a payload for the queue create event as we do not get code
- // location for the queue create event
- xpti::payload_t QPayload(QueueName.c_str());
- MInstanceID = xptiGetUniqueId();
- uint64_t RetInstanceNo;
- xpti_td *TEvent =
- xptiMakeEvent("queue_create", &QPayload,
- (uint16_t)xpti::trace_event_type_t::algorithm,
- xpti_at::active, &RetInstanceNo);
- // Cache the trace event, stream id and instance IDs for the destructor
- MTraceEvent = (void *)TEvent;
-
- xpti::addMetadata(TEvent, "sycl_context",
- reinterpret_cast(MContext->getHandleRef()));
- xpti::addMetadata(TEvent, "sycl_device_name",
- MDevice.get_info());
- xpti::addMetadata(TEvent, "sycl_device",
- reinterpret_cast(MDevice.getHandleRef()));
- xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
- xpti::addMetadata(TEvent, "queue_id", MQueueID);
- xpti::addMetadata(TEvent, "queue_handle",
- reinterpret_cast(getHandleRef()));
- // Also publish to TLS before notification
- xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
- xptiNotifySubscribers(detail::GSYCLStreamID,
- (uint16_t)xpti::trace_point_type_t::queue_create,
- nullptr, TEvent, MInstanceID,
- static_cast("queue_create"));
- }
- }
+ // If there are no subscribers to queue_create, return immediately.
+ constexpr uint16_t NotificationTraceType =
+ static_cast(xpti::trace_point_type_t::queue_create);
+ if (!anyTraceEnabled(NotificationTraceType))
+ return;
+ // We do not have CodeLoc for the queue constructor, so we will have to create
+ // a queue name with the queue ID to create an event; this step can be avoided
+ // by using CodeLoc.
+ xpti::utils::StringHelper SH;
+ std::string AddrStr = SH.addressAsString(MQueueID);
+ std::string QueueName = SH.nameWithAddressString("queue", AddrStr);
+
+ xpti_tracepoint_t *Event =
+ xptiCreateTracepoint(QueueName.c_str(), nullptr, 0, 0, (void *)this);
+ MInstanceID = xptiGetUniqueId();
+ xpti_td *TEvent = Event->event_ref();
+ // Cache the trace event, stream id and instance IDs for the destructor.
+ MTraceEvent = (void *)TEvent;
+ // We will allow the queue metadata to be set as this is performed
+ // infrequently.
+ xpti::addMetadata(TEvent, "sycl_context",
+ reinterpret_cast(MContext->getHandleRef()));
+ xpti::addMetadata(TEvent, "sycl_device_name",
+ MDevice.get_info());
+ xpti::addMetadata(TEvent, "sycl_device",
+ reinterpret_cast(MDevice.getHandleRef()));
+ xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
+ xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ xpti::addMetadata(TEvent, "queue_handle",
+ reinterpret_cast(getHandleRef()));
+ // Also publish to TLS before notification.
+ xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
+ xptiNotifySubscribers(detail::getActiveXPTIStreamID(),
+ (uint16_t)xpti::trace_point_type_t::queue_create,
+ nullptr, TEvent, MInstanceID,
+ static_cast("queue_create"));
#endif
}
@@ -716,10 +715,11 @@ void queue_impl::destructorNotification() {
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast(xpti::trace_point_type_t::queue_destroy);
- if (xptiCheckTraceEnabled(detail::GSYCLStreamID, NotificationTraceType)) {
+ if (anyTraceEnabled(NotificationTraceType)) {
// Use the cached trace event, stream id and instance IDs for the
// destructor
- xptiNotifySubscribers(detail::GSYCLStreamID, NotificationTraceType, nullptr,
+ xptiNotifySubscribers(detail::getActiveXPTIStreamID(),
+ NotificationTraceType, nullptr,
(xpti::trace_event_data_t *)MTraceEvent, MInstanceID,
static_cast("queue_destroy"));
xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent);
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index 1e1e37e63a9d2..e2e62cfcd531b 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -575,7 +575,7 @@ Command::Command(
return;
// Obtain the stream ID so all commands can emit traces to that stream;
// copying it to the member variable to avoid ABI breakage
- MStreamID = detail::GSYCLStreamID;
+ MStreamID = getActiveXPTIStreamID();
#endif
}
@@ -614,16 +614,16 @@ void Command::emitEdgeEventForCommandDependence(
std::string TypeString = SH.nameWithAddressString(Prefix, AddressStr);
// Create an edge with the dependent buffer address for which a command
// object has been created as one of the properties of the edge
- xpti::payload_t Payload(TypeString.c_str(), MAddress);
- uint64_t EdgeInstanceNo;
- xpti_td *EdgeEvent =
- xptiMakeEvent(TypeString.c_str(), &Payload, xpti::trace_graph_event,
- xpti_at::active, &EdgeInstanceNo);
- if (EdgeEvent) {
+ uint64_t EdgeInstanceNo = xptiGetUniqueId();
+ xpti_tracepoint_t *Event =
+ xptiCreateTracepoint(TypeString.c_str(), nullptr, 0, 0, MAddress);
+ if (Event) {
+ xpti_td *EdgeEvent = Event->event_ref();
xpti_td *SrcEvent = static_cast(Cmd->MTraceEvent);
xpti_td *TgtEvent = static_cast(MTraceEvent);
EdgeEvent->source_id = SrcEvent->unique_id;
EdgeEvent->target_id = TgtEvent->unique_id;
+ // We allow this metadata to be set as it describes the edge.
if (IsCommand) {
xpti::addMetadata(EdgeEvent, "access_mode",
static_cast(AccMode.value()));
@@ -670,29 +670,33 @@ void Command::emitEdgeEventForEventDependence(Command *Cmd,
std::string NodeName = SH.nameWithAddressString("virtual_node", AddressStr);
// Node name is "virtual_node[]"
- xpti::payload_t VNPayload(NodeName.c_str(), MAddress);
- uint64_t VNodeInstanceNo;
- xpti_td *NodeEvent =
- xptiMakeEvent(NodeName.c_str(), &VNPayload, xpti::trace_graph_event,
- xpti_at::active, &VNodeInstanceNo);
- // Emit the virtual node first
- xpti::addMetadata(NodeEvent, "kernel_name", NodeName);
- xptiNotifySubscribers(MStreamID, xpti::trace_node_create,
- detail::GSYCLGraphEvent, NodeEvent, VNodeInstanceNo,
- nullptr);
+ xpti_tracepoint_t *NEvent =
+ xptiCreateTracepoint(NodeName.c_str(), nullptr, 0, 0, MAddress);
+ uint64_t VNodeInstanceNo = xptiGetUniqueId();
+ xpti_td *NodeEvent = NEvent ? NEvent->event_ref() : nullptr;
+ if (NodeEvent) {
+ // We allow this metadata to be set as the node is a virtual node without
+ // an actual name.
+ xpti::addMetadata(NodeEvent, "kernel_name", NodeName);
+
+ xptiNotifySubscribers(MStreamID, xpti::trace_node_create,
+ detail::GSYCLGraphEvent, NodeEvent, VNodeInstanceNo,
+ nullptr);
+ }
// Create a new event for the edge
std::string EdgeName = SH.nameWithAddressString("Event", AddressStr);
- xpti::payload_t EdgePayload(EdgeName.c_str(), MAddress);
- uint64_t EdgeInstanceNo;
- xpti_td *EdgeEvent =
- xptiMakeEvent(EdgeName.c_str(), &EdgePayload, xpti::trace_graph_event,
- xpti_at::active, &EdgeInstanceNo);
+ xpti_tracepoint_t *EEvent =
+ xptiCreateTracepoint(EdgeName.c_str(), nullptr, 0, 0, MAddress);
+ uint64_t EdgeInstanceNo = xptiGetUniqueId();
+ xpti_td *EdgeEvent = EEvent ? EEvent->event_ref() : nullptr;
if (EdgeEvent && NodeEvent) {
// Source node represents the event and this event needs to be completed
// before target node can execute
xpti_td *TgtEvent = static_cast(MTraceEvent);
EdgeEvent->source_id = NodeEvent->unique_id;
EdgeEvent->target_id = TgtEvent->unique_id;
+ // We allow this metadata to be set as an edge without the event address
+ // will be less useful.
xpti::addMetadata(EdgeEvent, "event",
reinterpret_cast(UrEventAddr));
xptiNotifySubscribers(MStreamID, xpti::trace_edge_create,
@@ -719,11 +723,10 @@ uint64_t Command::makeTraceEventProlog(void *MAddress) {
std::string CommandString =
SH.nameWithAddressString(MCommandName, MAddressString);
- xpti::payload_t p(CommandString.c_str(), MAddress);
- xpti_td *CmdTraceEvent =
- xptiMakeEvent(CommandString.c_str(), &p, xpti::trace_graph_event,
- xpti_at::active, &CommandInstanceNo);
- MInstanceID = CommandInstanceNo;
+ xpti_tracepoint_t *Event =
+ xptiCreateTracepoint(CommandString.c_str(), nullptr, 0, 0, MAddress);
+ xpti_td *CmdTraceEvent = Event ? Event->event_ref() : nullptr;
+ MInstanceID = xptiGetUniqueId();
if (CmdTraceEvent) {
MTraceEvent = (void *)CmdTraceEvent;
// If we are seeing this event again, then the instance ID will be greater
@@ -734,7 +737,7 @@ uint64_t Command::makeTraceEventProlog(void *MAddress) {
// maintaining data integrity.
}
#endif
- return CommandInstanceNo;
+ return MInstanceID;
}
void Command::makeTraceEventEpilog() {
@@ -973,14 +976,15 @@ void Command::resolveReleaseDependencies(std::set &DepList) {
// Create an edge with the dependent buffer address being one of the
// properties of the edge
xpti::payload_t p(TypeString.c_str(), MAddress);
- uint64_t EdgeInstanceNo;
- xpti_td *EdgeEvent =
- xptiMakeEvent(TypeString.c_str(), &p, xpti::trace_graph_event,
- xpti_at::active, &EdgeInstanceNo);
+ uint64_t EdgeInstanceNo = xptiGetUniqueId();
+ xpti_tracepoint_t *Event =
+ xptiCreateTracepoint(TypeString.c_str(), nullptr, 0, 0, MAddress);
+ xpti_td *EdgeEvent = Event ? Event->event_ref() : nullptr;
if (EdgeEvent) {
xpti_td *SrcTraceEvent = static_cast(Item->MTraceEvent);
EdgeEvent->target_id = TgtTraceEvent->unique_id;
EdgeEvent->source_id = SrcTraceEvent->unique_id;
+ // We will ensure this is always added.
xpti::addMetadata(EdgeEvent, "memory_object",
reinterpret_cast(MAddress));
xptiNotifySubscribers(MStreamID, xpti::trace_edge_create,
@@ -1049,6 +1053,7 @@ void AllocaCommandBase::emitInstrumentationData() {
if (MTraceEvent) {
xpti_td *TE = static_cast(MTraceEvent);
addDeviceMetadata(TE, MQueue);
+ // Memory-object is used frequently, so it is always added.
xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
@@ -2032,42 +2037,29 @@ void instrumentationAddExtraKernelMetadata(
}
}
-void instrumentationFillCommonData(const std::string &KernelName,
- const std::string &FuncName,
- const std::string &FileName, uint64_t Line,
- uint64_t Column, const void *const Address,
- queue_impl *Queue,
- std::optional &FromSource,
- uint64_t &OutInstanceID,
- xpti_td *&OutTraceEvent) {
+void instrumentationFillCommonData(
+ xpti::stream_id_t StreamID, const std::string &KernelName,
+ const std::string &FuncName, const std::string &FileName, uint64_t Line,
+ uint64_t Column, const void *const Address, queue_impl *Queue,
+ std::optional &FromSource, uint64_t &OutInstanceID,
+ xpti_td *&OutTraceEvent) {
// Get source file, line number information from the CommandGroup object
// and create payload using name, address, and source info
- //
- // On Windows, since the support for builtin functions is not available in
- // MSVC, the MFileName, MLine will be set to nullptr and "0" respectively.
- // Handle this condition explicitly here.
- bool HasSourceInfo = false;
- xpti::payload_t Payload;
- if (!FileName.empty()) {
- // File name has a valid string
- Payload =
- xpti::payload_t(FuncName.empty() ? KernelName.data() : FuncName.data(),
- FileName.data(), Line, Column, Address);
- HasSourceInfo = true;
- } else if (Address) {
- // We have a valid function name and an address
- Payload = xpti::payload_t(KernelName.data(), Address);
+ bool HasSourceInfo = !FileName.empty();
+ xpti_tracepoint_t *Event;
+ void *AddressToUse = const_cast(Address);
+ if (HasSourceInfo) {
+ const auto &Name = FuncName.empty() ? KernelName : FuncName;
+ Event = xptiCreateTracepoint(Name.c_str(), FileName.c_str(), Line, Column,
+ AddressToUse);
} else {
- // In any case, we will have a valid function name and we'll use that to
- // create the hash
- Payload = xpti::payload_t(KernelName.data());
+ Event =
+ xptiCreateTracepoint(KernelName.data(), nullptr, 0, 0, AddressToUse);
}
- uint64_t CGKernelInstanceNo;
+ uint64_t CGKernelInstanceNo = xptiGetUniqueId();
// Create event using the payload
- xpti_td *CmdTraceEvent =
- xptiMakeEvent("ExecCG", &Payload, xpti::trace_graph_event,
- xpti::trace_activity_type_t::active, &CGKernelInstanceNo);
+ xpti_td *CmdTraceEvent = Event ? Event->event_ref() : nullptr;
if (CmdTraceEvent) {
OutInstanceID = CGKernelInstanceNo;
OutTraceEvent = CmdTraceEvent;
@@ -2076,15 +2068,19 @@ void instrumentationFillCommonData(const std::string &KernelName,
if (!KernelName.empty()) {
xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName);
}
- if (FromSource.has_value()) {
- xpti::addMetadata(CmdTraceEvent, "from_source", FromSource.value());
- }
- if (HasSourceInfo) {
- xpti::addMetadata(CmdTraceEvent, "sym_function_name", KernelName);
- xpti::addMetadata(CmdTraceEvent, "sym_source_file_name", FileName);
- xpti::addMetadata(CmdTraceEvent, "sym_line_no", static_cast(Line));
- xpti::addMetadata(CmdTraceEvent, "sym_column_no",
- static_cast(Column));
+ // We limit the metadata to only include the kernel name and device
+ // information by default.
+ if (detail::isDebugStream(StreamID)) {
+ if (FromSource.has_value()) {
+ xpti::addMetadata(CmdTraceEvent, "from_source", FromSource.value());
+ }
+ if (HasSourceInfo) {
+ xpti::addMetadata(CmdTraceEvent, "sym_function_name", KernelName);
+ xpti::addMetadata(CmdTraceEvent, "sym_source_file_name", FileName);
+ xpti::addMetadata(CmdTraceEvent, "sym_line_no", static_cast(Line));
+ xpti::addMetadata(CmdTraceEvent, "sym_column_no",
+ static_cast(Column));
+ }
}
// We no longer set the 'queue_id' in the metadata structure as it is a
// mutable value and multiple threads using the same queue created at the
@@ -2124,7 +2120,7 @@ std::pair emitKernelInstrumentationData(
? CodeLoc.functionName()
: std::string();
- instrumentationFillCommonData(KernelName, FuncName, FileName,
+ instrumentationFillCommonData(StreamID, KernelName, FuncName, FileName,
CodeLoc.lineNumber(), CodeLoc.columnNumber(),
Address, Queue, FromSource, InstanceID,
CmdTraceEvent);
@@ -2136,10 +2132,13 @@ std::pair emitKernelInstrumentationData(
if (Queue)
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
getQueueID(Queue));
- instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
- KernelBundleImplPtr, DeviceKernelInfo,
- SyclKernel, Queue, CGArgs);
-
+ // Add the additional metadata only if the debug information is subscribed
+ // to; in this case, it is the kernel and its parameters.
+ if (detail::isDebugStream(StreamID)) {
+ instrumentationAddExtraKernelMetadata(
+ CmdTraceEvent, NDRDesc, KernelBundleImplPtr, DeviceKernelInfo,
+ SyclKernel, Queue, CGArgs);
+ }
xptiNotifySubscribers(
StreamID, NotificationTraceType, detail::GSYCLGraphEvent, CmdTraceEvent,
InstanceID,
@@ -2179,10 +2178,10 @@ void ExecCGCommand::emitInstrumentationData() {
FuncName = MCommandGroup->MFunctionName;
xpti_td *CmdTraceEvent = nullptr;
- instrumentationFillCommonData(KernelName, FuncName, MCommandGroup->MFileName,
- MCommandGroup->MLine, MCommandGroup->MColumn,
- MAddress, MQueue.get(), FromSource, MInstanceID,
- CmdTraceEvent);
+ instrumentationFillCommonData(MStreamID, KernelName, FuncName,
+ MCommandGroup->MFileName, MCommandGroup->MLine,
+ MCommandGroup->MColumn, MAddress, MQueue.get(),
+ FromSource, MInstanceID, CmdTraceEvent);
if (CmdTraceEvent) {
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp
index 5f35997639da8..8abd2ea6e4b20 100644
--- a/sycl/source/detail/xpti_registry.cpp
+++ b/sycl/source/detail/xpti_registry.cpp
@@ -25,6 +25,7 @@ uint8_t GMemAllocStreamID = xpti::invalid_id;
uint8_t GCudaCallStreamID = xpti::invalid_id;
uint8_t GCudaDebugStreamID = xpti::invalid_id;
uint8_t GSYCLStreamID = xpti::invalid_id;
+uint8_t GSYCLDebugStreamID = xpti::invalid_id;
uint8_t GUrCallStreamID = xpti::invalid_id;
uint8_t GUrApiStreamID = xpti::invalid_id;
diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp
index 16749ee259b0c..9598eefab7b27 100644
--- a/sycl/source/detail/xpti_registry.hpp
+++ b/sycl/source/detail/xpti_registry.hpp
@@ -40,6 +40,11 @@ constexpr const char *GVerStr = SYCL_VERSION_STR;
/// We define all the streams used the instrumentation framework here
inline constexpr const char *SYCL_STREAM_NAME = "sycl";
+// We will use "sycl.debug" stream name as an indicator of needing debugging
+// information; in this case, the tool will have to subscribe to the sycl.debug
+// stream to get additional debug metadata, but the metadata will still be sent
+// through the regular streams.
+inline constexpr const char *SYCL_DEBUG_STREAM_NAME = "sycl.debug";
inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME =
"sycl.experimental.mem_alloc";
// Stream name being used to notify about buffer objects.
@@ -53,6 +58,7 @@ extern uint8_t GBufferStreamID;
extern uint8_t GImageStreamID;
extern uint8_t GMemAllocStreamID;
extern uint8_t GSYCLStreamID;
+extern uint8_t GSYCLDebugStreamID;
extern uint8_t GUrApiStreamID;
extern xpti::trace_event_data_t *GMemAllocEvent;
@@ -62,6 +68,22 @@ extern xpti::trace_event_data_t *GApiEvent;
// We will pick a global constant so that the pointer in TLS never goes stale
inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id";
+
+// Helper to check if xpti stream is debug.
+inline bool isDebugStream(xpti::stream_id_t StreamID) {
+ return StreamID == detail::GSYCLDebugStreamID;
+}
+
+inline uint8_t getActiveXPTIStreamID() {
+ return xptiCheckTraceEnabled(detail::GSYCLDebugStreamID)
+ ? detail::GSYCLDebugStreamID
+ : detail::GSYCLStreamID;
+}
+
+inline bool anyTraceEnabled(uint16_t TraceType) {
+ return xptiCheckTraceEnabled(detail::GSYCLDebugStreamID, TraceType) ||
+ xptiCheckTraceEnabled(detail::GSYCLStreamID, TraceType);
+}
#endif
class XPTIRegistry {
@@ -79,6 +101,10 @@ class XPTIRegistry {
// SYCL events
detail::GSYCLStreamID =
this->initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr);
+ // Register the SYCL Debug event stream; tools subscribing to this stream
+ // will receive additional metadata in the regular "sycl" stream.
+ detail::GSYCLDebugStreamID = this->initializeStream(
+ SYCL_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr);
// SYCL buffer events
detail::GBufferStreamID = this->initializeStream(
SYCL_BUFFER_STREAM_NAME, GMajVer, GMinVer, GVerStr);
@@ -98,8 +124,9 @@ class XPTIRegistry {
if (detail::GSYCLGraphEvent) {
// The graph event is a global event and will be used as the parent for
// all nodes (command groups, memory allocations, etc)
- xptiNotifySubscribers(detail::GSYCLStreamID, xpti::trace_graph_create,
- nullptr, detail::GSYCLGraphEvent,
+ xptiNotifySubscribers(detail::getActiveXPTIStreamID(),
+ xpti::trace_graph_create, nullptr,
+ detail::GSYCLGraphEvent,
detail::GSYCLGraphEvent->instance_id, nullptr);
}
auto MemAllocEventTP =
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index e9a4322a5b191..b7a45d37f75f4 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -645,21 +645,21 @@ event handler::finalize() {
? nullptr
: detail::event_impl::create_device_event(impl->get_queue());
-#ifdef XPTI_ENABLE_INSTRUMENTATION
- // Only enable instrumentation if there are subscribes to the SYCL stream
- const bool xptiEnabled = xptiCheckTraceEnabled(detail::GSYCLStreamID);
-#endif
auto EnqueueKernel = [&]() {
#ifdef XPTI_ENABLE_INSTRUMENTATION
xpti_td *CmdTraceEvent = nullptr;
uint64_t InstanceID = 0;
+ auto StreamID = detail::getActiveXPTIStreamID();
+ // Only enable instrumentation if there are subscribes to the SYCL
+ // stream
+ const bool xptiEnabled = xptiCheckTraceEnabled(StreamID);
if (xptiEnabled) {
std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData(
- detail::GSYCLStreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc,
+ StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc,
*impl->MKernelData.getDeviceKernelInfoPtr(),
impl->get_queue_or_null(), impl->MKernelData.getNDRDesc(),
KernelBundleImpPtr, impl->MKernelData.getArgs());
- detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID,
+ detail::emitInstrumentationGeneral(StreamID, InstanceID,
CmdTraceEvent,
xpti::trace_task_begin, nullptr);
}
@@ -685,11 +685,10 @@ event handler::finalize() {
// Emit signal only when event is created
if (!DiscardEvent) {
detail::emitInstrumentationGeneral(
- detail::GSYCLStreamID, InstanceID, CmdTraceEvent,
- xpti::trace_signal,
+ StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal,
static_cast(ResultEvent->getHandle()));
}
- detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID,
+ detail::emitInstrumentationGeneral(StreamID, InstanceID,
CmdTraceEvent,
xpti::trace_task_end, nullptr);
}
diff --git a/sycl/test-e2e/XPTI/Inputs/memory_info_collector.cpp b/sycl/test-e2e/XPTI/Inputs/memory_info_collector.cpp
index a2dd139d112a6..bd7e0d457c160 100644
--- a/sycl/test-e2e/XPTI/Inputs/memory_info_collector.cpp
+++ b/sycl/test-e2e/XPTI/Inputs/memory_info_collector.cpp
@@ -56,7 +56,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
xptiRegisterCallback(StreamID, static_cast(t),
syclImageCallback);
}
- if (NameView == "sycl") {
+ if (NameView == "sycl" || NameView == "sycl.debug") {
uint8_t StreamID = xptiRegisterStream(StreamName);
for (type t : std::initializer_list{
type::graph_create, type::node_create, type::edge_create,
@@ -105,10 +105,9 @@ XPTI_CALLBACK_API void syclBufferCallback(uint16_t TraceType,
<< BufConstr->user_object_handle << "|0x"
<< BufConstr->host_object_handle << "|" << std::dec
<< BufConstr->element_type << "|" << BufConstr->element_size
- << "|" << BufConstr->dim << "|"
- << "{" << BufConstr->range[0] << "," << BufConstr->range[1] << ","
- << BufConstr->range[2] << "}|"
- << Event->reserved.payload->source_file << ":"
+ << "|" << BufConstr->dim << "|" << "{" << BufConstr->range[0]
+ << "," << BufConstr->range[1] << "," << BufConstr->range[2]
+ << "}|" << Event->reserved.payload->source_file << ":"
<< Event->reserved.payload->line_no << ":"
<< Event->reserved.payload->column_no << "\n";
@@ -167,9 +166,9 @@ XPTI_CALLBACK_API void syclImageCallback(uint16_t TraceType,
std::cout << "un";
std::cout << "sampled image|0x" << std::hex << ImgConstr->user_object_handle
<< "|0x" << ImgConstr->host_object_handle << "|" << std::dec
- << ImgConstr->dim << "|"
- << "{" << ImgConstr->range[0] << "," << ImgConstr->range[1] << ","
- << ImgConstr->range[2] << "}|" << ImgConstr->format << "|";
+ << ImgConstr->dim << "|" << "{" << ImgConstr->range[0] << ","
+ << ImgConstr->range[1] << "," << ImgConstr->range[2] << "}|"
+ << ImgConstr->format << "|";
if (IsSampledImage)
std::cout << *ImgConstr->addressing << "|"
<< *ImgConstr->coordinate_normalization << "|"
@@ -282,8 +281,7 @@ void parseMetadata(xpti::trace_event_data_t *Event) {
Metadata, Name.c_str());
std::cout << " " << Name << " : {" << arg.type << ", " << std::hex
<< "0x" << (uintptr_t)arg.pointer << std::dec << ", "
- << arg.size << ", " << arg.index << "} "
- << "\n";
+ << arg.size << ", " << arg.index << "} " << "\n";
}
} else {
std::cout << "\n";
diff --git a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp
index 86e126714fc07..c147521a2efc2 100644
--- a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp
+++ b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp
@@ -26,7 +26,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int MajorVersion,
for (type t : std::initializer_list{type::function_with_args_begin})
xptiRegisterCallback(StreamID, static_cast(t), syclUrCallback);
}
- if (NameView == "sycl") {
+ if (NameView == "sycl" || NameView == "sycl.debug") {
uint8_t StreamID = xptiRegisterStream(StreamName);
for (type t : std::initializer_list{
diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
index 2fc4f365a813e..8faa2f2678356 100644
--- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
+++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp
@@ -6,6 +6,7 @@
#include "basic_event_collection.inc"
//
// CHECK-DAG: xptiTraceInit: Stream Name = sycl
+// CHECK-DAG: xptiTraceInit: Stream Name = sycl.debug
// CHECK-DAG: xptiTraceInit: Stream Name = sycl.experimental.buffer
// CHECK-DAG: xptiTraceInit: Stream Name = sycl.experimental.image
// CHECK-DAG: xptiTraceInit: Stream Name = sycl.experimental.mem_alloc
diff --git a/sycl/tools/sycl-prof/collector.cpp b/sycl/tools/sycl-prof/collector.cpp
index 7afca930b9194..475df602c00ca 100644
--- a/sycl/tools/sycl-prof/collector.cpp
+++ b/sycl/tools/sycl-prof/collector.cpp
@@ -83,7 +83,7 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
urBeginEndCallback);
xptiRegisterCallback(StreamID, xpti::trace_function_with_args_end,
urBeginEndCallback);
- } else if (NameView == "sycl") {
+ } else if (NameView == "sycl" || NameView == "sycl.debug") {
uint8_t StreamID = xptiRegisterStream(StreamName);
xptiRegisterCallback(StreamID, xpti::trace_task_begin,
taskBeginEndCallback);
diff --git a/sycl/tools/sycl-trace/collector.cpp b/sycl/tools/sycl-trace/collector.cpp
index 79df84994923a..1b69e46e09410 100644
--- a/sycl/tools/sycl-trace/collector.cpp
+++ b/sycl/tools/sycl-trace/collector.cpp
@@ -180,7 +180,8 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
unsigned int /*minor_version*/,
const char * /*version_str*/,
const char *StreamName) {
- if (std::string_view(StreamName) == "ur.call" &&
+ if ((std::string_view(StreamName) == "ur.call" ||
+ std::string_view(StreamName) == "ur.call.debug") &&
std::getenv("SYCL_TRACE_UR_ENABLE")) {
urPrintersInit();
uint16_t StreamID = xptiRegisterStream(StreamName);
@@ -215,7 +216,8 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
}
#endif
}
- if (std::string_view(StreamName) == "sycl" &&
+ if ((std::string_view(StreamName) == "sycl" ||
+ std::string_view(StreamName) == "sycl.debug") &&
std::getenv("SYCL_TRACE_API_ENABLE")) {
syclPrintersInit();
uint16_t StreamID = xptiRegisterStream(StreamName);
@@ -236,7 +238,8 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
}
XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) {
- if (std::string_view(StreamName) == "ur.call" &&
+ if ((std::string_view(StreamName) == "ur.call" ||
+ std::string_view(StreamName) == "ur.call.debug") &&
std::getenv("SYCL_TRACE_UR_ENABLE"))
urPrintersFinish();
#ifdef SYCL_HAS_LEVEL_ZERO
@@ -254,10 +257,12 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) {
cudaCollectorLibrary.clear();
}
#endif
- if (std::string_view(StreamName) == "sycl" &&
- std::getenv("SYCL_TRACE_API_ENABLE"))
+ if ((std::string_view(StreamName) == "sycl" ||
+ std::string_view(StreamName) == "sycl.debug") &&
+ std::getenv("SYCL_TRACE_API_ENABLE")) {
syclPrintersFinish();
- if (std::getenv("SYCL_TRACE_VERIFICATION_ENABLE")) {
- vPrintersFinish();
+ if (std::getenv("SYCL_TRACE_VERIFICATION_ENABLE")) {
+ vPrintersFinish();
+ }
}
}
diff --git a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp
index 2c79f76269c11..952c5124d144a 100644
--- a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp
+++ b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp
@@ -153,6 +153,17 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/,
xptiRegisterCallback(StreamID, xpti::trace_queue_destroy, testCallback);
xptiRegisterCallback(StreamID, xpti::trace_task_begin, testCallback);
xptiRegisterCallback(StreamID, xpti::trace_task_end, testCallback);
+ // Register callbacks for the debug stream to enable additional
+ // metadata that may be used for tests.
+ uint8_t DebugStreamID = xptiRegisterStream("sycl.debug");
+ xptiRegisterCallback(DebugStreamID, xpti::trace_diagnostics, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_node_create, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_task_begin, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_task_end, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_queue_create, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_queue_destroy, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_task_begin, testCallback);
+ xptiRegisterCallback(DebugStreamID, xpti::trace_task_end, testCallback);
}
XPTI_CALLBACK_API void xptiTraceFinish(const char * /*StreamName*/) {}
diff --git a/unified-runtime/source/adapters/cuda/tracing.cpp b/unified-runtime/source/adapters/cuda/tracing.cpp
index fb8e7c83d32f1..c1f3b0ecb36de 100644
--- a/unified-runtime/source/adapters/cuda/tracing.cpp
+++ b/unified-runtime/source/adapters/cuda/tracing.cpp
@@ -107,14 +107,20 @@ static void cuptiCallback(void *UserData, CUpti_CallbackDomain,
uint8_t CallStreamID = xptiRegisterStream(CUDA_CALL_STREAM_NAME);
uint8_t DebugStreamID = xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
- xptiNotifySubscribers(CallStreamID, TraceType, Ctx->CallEvent, nullptr,
- CallCorrelationID, FuncName);
-
- xpti::function_with_args_t Payload{
- FuncID, FuncName, const_cast(CBInfo->functionParams),
- CBInfo->functionReturnValue, CBInfo->context};
- xptiNotifySubscribers(DebugStreamID, TraceTypeArgs, Ctx->DebugEvent,
- nullptr, DebugCorrelationID, &Payload);
+ // Only notify if there are subscribers.
+ if (xptiCheckTraceEnabled(CallStreamID, TraceType)) {
+ xptiNotifySubscribers(CallStreamID, TraceType, Ctx->CallEvent, nullptr,
+ CallCorrelationID, FuncName);
+ }
+
+ // Prepare the payload and notify subscribers if there are subscribers.
+ if (xptiCheckTraceEnabled(DebugStreamID, TraceTypeArgs)) {
+ xpti::function_with_args_t Payload{
+ FuncID, FuncName, const_cast(CBInfo->functionParams),
+ CBInfo->functionReturnValue, CBInfo->context};
+ xptiNotifySubscribers(DebugStreamID, TraceTypeArgs, Ctx->DebugEvent,
+ nullptr, DebugCorrelationID, &Payload);
+ }
}
}
#endif
@@ -193,16 +199,13 @@ void enableCUDATracing(cuda_tracing_context_t_ *Ctx) {
xptiRegisterStream(CUDA_DEBUG_STREAM_NAME);
xptiInitialize(CUDA_DEBUG_STREAM_NAME, GMajVer, GMinVer, GVerStr);
- uint64_t Dummy;
- xpti::payload_t CUDAPayload("CUDA Plugin Layer");
- Ctx->CallEvent =
- xptiMakeEvent("CUDA Plugin Layer", &CUDAPayload,
- xpti::trace_algorithm_event, xpti_at::active, &Dummy);
-
- xpti::payload_t CUDADebugPayload("CUDA Plugin Debug Layer");
+ auto CudaCallEvent =
+ xptiCreateTracepoint("CUDA Plugin Layer", nullptr, 0, 0, nullptr);
+ auto CudaCallDebugEvent =
+ xptiCreateTracepoint("CUDA Plugin Debug Layer", nullptr, 0, 0, nullptr);
+ Ctx->CallEvent = CudaCallEvent ? CudaCallEvent->event_ref() : nullptr;
Ctx->DebugEvent =
- xptiMakeEvent("CUDA Plugin Debug Layer", &CUDADebugPayload,
- xpti::trace_algorithm_event, xpti_at::active, &Dummy);
+ CudaCallDebugEvent ? CudaCallDebugEvent->event_ref() : nullptr;
Ctx->Cupti.Subscribe(&Ctx->Subscriber, cuptiCallback, Ctx);
Ctx->Cupti.EnableDomain(1, Ctx->Subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
diff --git a/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.cpp b/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.cpp
index b8ffa1edba5be..3125c3fcfb0d1 100644
--- a/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.cpp
+++ b/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.cpp
@@ -24,6 +24,7 @@ namespace ur_tracing_layer {
context_t *getContext() { return context_t::get_direct(); }
constexpr auto CALL_STREAM_NAME = "ur.call";
+constexpr auto DEBUG_CALL_STREAM_NAME = "ur.call.debug";
constexpr auto STREAM_VER_MAJOR = UR_MAJOR_VERSION(UR_API_VERSION_CURRENT);
constexpr auto STREAM_VER_MINOR = UR_MINOR_VERSION(UR_API_VERSION_CURRENT);
@@ -40,6 +41,10 @@ static std::shared_ptr xptiContextManagerGet() {
static auto contextManager = std::make_shared();
return contextManager;
}
+
+// The Unified Runtime API calls are meant to be performant and creating an
+// event for each API Call will add significant overheads.
+static xpti_td *GURCallEvent = nullptr;
static thread_local xpti_td *activeEvent;
///////////////////////////////////////////////////////////////////////////////
@@ -47,36 +52,53 @@ context_t::context_t() : logger(logger::create_logger("tracing", true, true)) {
this->xptiContextManager = xptiContextManagerGet();
call_stream_id = xptiRegisterStream(CALL_STREAM_NAME);
+ debug_call_stream_id = xptiRegisterStream(DEBUG_CALL_STREAM_NAME);
std::ostringstream streamv;
streamv << STREAM_VER_MAJOR << "." << STREAM_VER_MINOR;
xptiInitialize(CALL_STREAM_NAME, STREAM_VER_MAJOR, STREAM_VER_MINOR,
streamv.str().data());
+ xptiInitialize(DEBUG_CALL_STREAM_NAME, STREAM_VER_MAJOR, STREAM_VER_MINOR,
+ streamv.str().data());
+ // Create global event for all UR API calls.
+ xpti_tracepoint_t *Event =
+ xptiCreateTracepoint("Unified Runtime call", nullptr, 0, 0, (void *)this);
+ // For function_begin/function_end class of notification, the parent and the
+ // event object can be NULL based on the specification.
+ GURCallEvent = Event ? Event->event_ref() : nullptr;
}
void context_t::notify(uint16_t trace_type, uint32_t id, const char *name,
void *args, ur_result_t *resultp, uint64_t instance) {
xpti::function_with_args_t payload{id, name, args, resultp, nullptr};
- xptiNotifySubscribers(call_stream_id, trace_type, nullptr, activeEvent,
- instance, &payload);
+ if (xptiCheckTraceEnabled(debug_call_stream_id)) {
+ xptiNotifySubscribers(debug_call_stream_id, trace_type, nullptr,
+ activeEvent, instance, &payload);
+ } else {
+ // Use global event for all UR API calls
+ xptiNotifySubscribers(call_stream_id, trace_type, nullptr, activeEvent,
+ instance, &payload);
+ }
}
uint64_t context_t::notify_begin(uint32_t id, const char *name, void *args) {
- // we use UINT64_MAX as a special value that means "tracing disabled",
- // so that we don't have to repeat this check in notify_end.
- if (!xptiCheckTraceEnabled(call_stream_id)) {
+ if (xptiCheckTraceEnabled(debug_call_stream_id)) {
+ // Create a new tracepoint with code location info for each UR API call.
+ // This adds significant overhead to the tracing toolchain, so do this only
+ // if there are debug stream subscribers.
+ if (auto loc = codelocData.get_codeloc()) {
+ xpti_tracepoint_t *Event = xptiCreateTracepoint(
+ loc->functionName, loc->sourceFile, loc->lineNumber,
+ loc->columnNumber, (void *)this);
+ activeEvent = Event ? Event->event_ref() : nullptr;
+ }
+ } else if (xptiCheckTraceEnabled(call_stream_id)) {
+ // Otherwise use global event for all UR API calls.
+ activeEvent = GURCallEvent;
+ } else {
+ // We use UINT64_MAX as a special value that means "tracing disabled",
+ // so that we don't have to repeat this check in notify_end.
return UINT64_MAX;
}
-
- if (auto loc = codelocData.get_codeloc()) {
- xpti::payload_t payload =
- xpti::payload_t(loc->functionName, loc->sourceFile, loc->lineNumber,
- loc->columnNumber, nullptr);
- uint64_t InstanceNumber{};
- activeEvent =
- xptiMakeEvent("Unified Runtime call", &payload, xpti::trace_graph_event,
- xpti_at::active, &InstanceNumber);
- }
-
uint64_t instance = xptiGetUniqueId();
notify((uint16_t)xpti::trace_point_type_t::function_with_args_begin, id, name,
args, nullptr, instance);
diff --git a/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.hpp b/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.hpp
index f1fcd01cab3ae..35268b03b64fd 100644
--- a/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.hpp
+++ b/unified-runtime/source/loader/layers/tracing/ur_tracing_layer.hpp
@@ -48,6 +48,7 @@ class __urdlllocal context_t : public proxy_layer_context_t,
void notify(uint16_t trace_type, uint32_t id, const char *name, void *args,
ur_result_t *resultp, uint64_t instance);
uint8_t call_stream_id;
+ uint8_t debug_call_stream_id;
inline static const std::string name = "UR_LAYER_TRACING";
diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h
index 2ac5494d79960..99ee1a5fb2f16 100644
--- a/xpti/include/xpti/xpti_trace_framework.h
+++ b/xpti/include/xpti/xpti_trace_framework.h
@@ -781,9 +781,7 @@ xptiUnregisterCallback(xpti::stream_id_t stream_id, uint16_t trace_type,
/// @brief Notifies all registered subscribers that an event has occurred
/// @details Subscribers receive notifications to the trace point types they
-/// register a callback with. This function allows subscribers to unregister
-/// any previously registered callback functions with this function so they can
-/// stop receiving notifications.
+/// register a callback with.
///
/// @param stream_id The stream for which the registration must be disabled
/// @param trace_type The trace point type for which the notification is being