Skip to content

Commit 2ea9563

Browse files
committed
[DEBUG] Integrate PTI callback interface and build it from sources
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 5e96f82 commit 2ea9563

File tree

13 files changed

+477
-46
lines changed

13 files changed

+477
-46
lines changed

.github/workflows/triton-benchmarks.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,24 @@ jobs:
116116
cd benchmarks
117117
pip install .
118118
119+
- name: Build PTI from source
120+
id: build-pti
121+
run: |
122+
git clone https://github.com/intel/pti-gpu.git
123+
cd pti-gpu
124+
git checkout 15a201d25e5659692613b98ee33513263b689101
125+
cd sdk
126+
cmake --preset linux-icpx-release
127+
BUILD_TESTING=1 PTI_BUILD_SAMPLES=1 cmake --build --preset linux-icpx-release
128+
129+
PTI_LIBS_DIR="$(pwd)/build-linux-icpx-release/lib/"
130+
ls $PTI_LIBS_DIR
131+
echo "PTI_LIBS_DIR=$PTI_LIBS_DIR" >> $GITHUB_ENV
132+
119133
- name: Run Triton Softmax kernel benchmark
120134
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
121135
run: |
136+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
122137
cd benchmarks/triton_kernels_benchmark
123138
python fused_softmax.py --reports $REPORTS --n_runs $N_RUNS
124139
source ../../scripts/capture-hw-details.sh

python/tutorials/09-persistent-matmul.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ def bench_fn(label, reps, warmup_reps, fn, *args):
673673
for _ in range(warmup_reps):
674674
fn(*args)
675675
#FIXME: Enable for XPU once proton support works.
676-
if is_cuda():
676+
if True or is_cuda():
677677
with proton_context():
678678
for _ in range(reps):
679679
fn(*args)
@@ -783,11 +783,11 @@ def show_profile(precision, profile_name):
783783

784784
validate(32, 32, 32, dtype)
785785
validate(8192, 8192, args.K_range[0], dtype)
786-
if is_cuda():
786+
if True or is_cuda():
787787
proton.start("matmul", hook="triton")
788788
proton.deactivate()
789789
for K in range(args.K_range[0], args.K_range[1] + 1, args.K_step):
790790
bench(K, dtype)
791-
if is_cuda():
791+
if True or is_cuda():
792792
proton.finalize()
793793
show_profile(args.prec, "matmul")

third_party/intel/backend/proton/include/pti/pti.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ typedef enum {
3131
//!< PTI_VIEW_EXTERNAL_CORRELATION
3232
PTI_ERROR_BAD_TIMESTAMP = 6, //!< error in timestamp conversion, might be related with the user
3333
//!< provided TimestampCallback
34-
PTI_ERROR_BAD_API_ID = 7, //!< invalid api_id when enable/disable runtime/driver specific api_id
34+
PTI_ERROR_BAD_API_ID = 7, //!< invalid api_id when enable/disable runtime/driver specific api_id
35+
PTI_ERROR_NO_GPU_VIEWS_ENABLED = 8, //!< at least one GPU view must be enabled for kernel tracing
36+
3537
PTI_ERROR_DRIVER = 50, //!< unknown driver error
3638
PTI_ERROR_TRACING_NOT_INITIALIZED = 51, //!< installed driver requires tracing enabling with
3739
//!< setting environment variable ZE_ENABLE_TRACING_LAYER
@@ -57,6 +59,25 @@ typedef enum {
5759
*/
5860
PTI_EXPORT const char* ptiResultTypeToString(pti_result result_value);
5961

62+
63+
/**
64+
* @brief Abstraction for backend-specific objects.
65+
*
66+
* Level Zero is currently the only supported backend. However, these types will attempt to serve other backends.
67+
* In case the other backend supported - the same types will serve it.
68+
*/
69+
70+
typedef void* pti_device_handle_t; //!< Device handle
71+
72+
typedef void* pti_backend_ctx_t; //!< Backend context handle
73+
74+
typedef void* pti_backend_queue_t; //!< Backend queue handle
75+
76+
typedef void* pti_backend_evt_t; //!< Backend event handle
77+
78+
typedef void* pti_backend_command_list_t; //!< Backend command list handle
79+
80+
6081
#if defined(__cplusplus)
6182
}
6283
#endif
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
//==============================================================
2+
// Copyright (C) Intel Corporation
3+
//
4+
// SPDX-License-Identifier: MIT
5+
// =============================================================
6+
7+
#ifndef PTI_CALLBACK_H_
8+
#define PTI_CALLBACK_H_
9+
10+
#include <stdint.h>
11+
12+
#include "pti/pti.h"
13+
#include "pti/pti_view.h"
14+
15+
/**
16+
* This file contains APIs that are so far experimental in PTI.
17+
* APIs and data structures in this file are work-in-progress and subject to change!
18+
* All content in this file concerns the Callback API.
19+
*
20+
* The Callback API is useful for many purposes,
21+
* including the implementation of `MetricsScope` functionality that needs to subscribe to
22+
* domains such as kernel append to a command list, and potentially other domains.
23+
* The `MetricsScope` API is under development and is the first (internal) user of the Callback API.
24+
*/
25+
26+
27+
/* clang-format off */
28+
#if defined(__cplusplus)
29+
extern "C" {
30+
#endif
31+
32+
typedef struct _pti_callback_subscriber* pti_callback_subscriber_handle;
33+
34+
typedef enum _pti_callback_domain {
35+
PTI_CB_DOMAIN_INVALID = 0,
36+
PTI_CB_DOMAIN_DRIVER_CONTEXT_CREATED = 1, //!< Not implemented yet
37+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
38+
39+
PTI_CB_DOMAIN_DRIVER_MODULE_LOADED = 2, //!< Not implemented yet
40+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
41+
42+
PTI_CB_DOMAIN_DRIVER_MODULE_UNLOADED = 3, //!< Not implemented yet
43+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
44+
45+
PTI_CB_DOMAIN_DRIVER_GPU_OPERATION_APPENDED = 4, //!< Synchronous callback
46+
//!< This also serves as PTI_CB_DOMAIN_DRIVER_GPU_OPERATION_DISPATCHED
47+
//!< when appended to Immediate Command List,
48+
//!< which means no separate callback PTI_CB_DOMAIN_DRIVER_GPU_OPERATION_DISPATCHED
49+
50+
PTI_CB_DOMAIN_DRIVER_GPU_OPERATION_DISPATCHED = 5, //!< Not implemented yet
51+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
52+
53+
PTI_CB_DOMAIN_DRIVER_GPU_OPERATION_COMPLETED = 6, //!< Asynchronous callback, always has only EXIT phase of some API,
54+
//!< where completed operations are collected and reported
55+
56+
PTI_CB_DOMAIN_DRIVER_HOST_SYNCHRONIZATION = 7, //!< Not implemented yet
57+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
58+
59+
PTI_CB_DOMAIN_DRIVER_API = 1023, //!< Not implemented yet,
60+
//!< attempt to enable it will return PTI_ERROR_NOT_IMPLEMENTED
61+
//!< Callback created for all Driver APIs
62+
// below domains to inform user about PTI internal events
63+
PTI_CB_DOMAIN_INTERNAL_THREADS = 1024, //!< Not implemented yet
64+
PTI_CB_DOMAIN_INTERNAL_EVENT = 1025, //!< Not implemented yet
65+
66+
PTI_CB_DOMAIN_MAX = 0x7fffffff
67+
} pti_callback_domain;
68+
69+
typedef enum _pti_callback_phase {
70+
PTI_CB_PHASE_INVALID = 0,
71+
PTI_CB_PHASE_API_ENTER = 1,
72+
PTI_CB_PHASE_API_EXIT = 2,
73+
PTI_CB_PHASE_INTERNAL_THREAD_START = 3,
74+
PTI_CB_PHASE_INTERNAL_THREAD_END = 4,
75+
PTI_CB_PHASE_INTERNAL_EVENT = 5,
76+
77+
PTI_CB_PHASE_MAX = 0x7fffffff
78+
} pti_callback_phase;
79+
80+
typedef enum _pti_backend_command_list_type {
81+
PTI_BACKEND_COMMAND_LIST_TYPE_UNKNOWN = (1<<0),
82+
PTI_BACKEND_COMMAND_LIST_TYPE_IMMEDIATE = (1<<1),
83+
PTI_BACKEND_COMMAND_LIST_TYPE_MUTABLE = (1<<2),
84+
85+
PTI_BACKEND_COMMAND_LIST_TYPE_MAX = 0x7fffffff
86+
} pti_backend_command_list_type;
87+
88+
/**
89+
* A user can subscribe to notifications about non-standard situations from PTI
90+
* when it collects or processes the data
91+
*/
92+
typedef enum _pti_internal_event_type {
93+
PTI_INTERNAL_EVENT_TYPE_INFO = 0,
94+
PTI_INTERNAL_EVENT_TYPE_WARNING = 1, // one or a few records data inconsistencies, or other
95+
// collection is safe to continue
96+
PTI_INTERNAL_EVENT_TYPE_CRITICAL = 2, // critical error after which further collected data are invalid
97+
98+
PTI_INTERNAL_EVENT_TYPE_MAX = 0x7fffffff
99+
} pti_internal_event_type;
100+
101+
typedef enum _pti_gpu_operation_kind {
102+
PTI_GPU_OPERATION_KIND_INVALID = 0,
103+
PTI_GPU_OPERATION_KIND_KERNEL = 1,
104+
PTI_GPU_OPERATION_KIND_MEMORY = 2,
105+
PTI_GPU_OPERATION_KIND_OTHER = 3,
106+
107+
PTI_GPU_OPERATION_KIND_MAX = 0x7fffffff
108+
} pti_gpu_operation_kind;
109+
110+
typedef struct _pti_gpu_op_details {
111+
pti_gpu_operation_kind _operation_kind; //<! Kind of the operation: kernel, mem op
112+
uint64_t _operation_id; //<! GPU kernel or memory operation instance ID,
113+
//<! uniquely throughout the process
114+
uint64_t _kernel_handle; //!< a handle uniquely identifying kernel object as
115+
//!< contained in the module at the specific offset
116+
//!< it will be zero in case of not implemented yet or
117+
//!< for memory operations
118+
const char* _name; //!< symbolic name of a kernel or memcpy operation
119+
} pti_gpu_op_details;
120+
121+
typedef struct _pti_callback_gpu_op_data {
122+
pti_callback_domain _domain; //!< domain of the callback
123+
pti_backend_command_list_type _cmd_list_properties; //!< immediate, mutable,..
124+
pti_backend_command_list_t _cmd_list_handle; //!< Device back-end command list handle,
125+
//!< could be nullptr if unknown or
126+
//!< when several operations with different command lists
127+
//!< reported together
128+
pti_backend_queue_t _queue_handle; //!< Device back-end queue handle,
129+
//!< could be nullptr if unknown
130+
//!< when several operations with different command lists
131+
//!< reported together
132+
pti_device_handle_t _device_handle; //!< Device handle
133+
pti_callback_phase _phase; //!< PTI_CB_PHASE_API_ENTER/EXIT
134+
uint32_t _return_code; //!< will be valid only for L0 API EXIT, for others will be zero
135+
uint32_t _correlation_id; //!< ID that corresponds to the same call reported by View API records
136+
uint32_t _operation_count; //!< number of operations appended or dispatched to the GPU
137+
pti_gpu_op_details* _operation_details; //!< pointer to details of operation(s) appended, dispatched or completed
138+
} pti_callback_gpu_op_data;
139+
140+
typedef struct _pti_internal_callback_data {
141+
pti_callback_domain _domain; //!< domain of the callback
142+
pti_callback_phase _phase; //!< THREAD START/END or INTERNAL EVENT
143+
uint32_t _detail; //!< depending on the domain should be casted/interpreted
144+
//!< as a purpose of an internal PTI thread or
145+
//!< pti_internal_event_type
146+
const char* _message; //!< explains details
147+
} pti_internal_callback_data;
148+
149+
typedef void (*pti_callback_function)(
150+
pti_callback_domain domain,
151+
pti_api_group_id driver_api_group_id, //!< driver API group ID, keep it to distinguish between L0 and OpenCL
152+
//!< although the current implementation is only for L0
153+
uint32_t driver_api_id,
154+
pti_backend_ctx_t backend_context, //!< Driver (L0) level context handle
155+
void* cb_data, //!< depending on the domain, it should be type-casted to the pointer
156+
//!< to either pti_callback_gpu_op_data, pti_internal_callback_data,
157+
//!< or to other types to be defined
158+
void* global_user_data, //!< Any global data defined by user returned
159+
//!< to every callback from a same subscriber
160+
void** instance_user_data); //!< Data that could be passed between ENTER and EXIT
161+
//!< phases of one API call
162+
163+
/**
164+
* Callback API functions
165+
* None of the PTI API functions should be called from within a Callback function.
166+
* Exceptions are helper functions that return character representations of enums.
167+
*/
168+
169+
/**
170+
* @brief Initialize Callback subscriber
171+
*
172+
* @param subscriber - subscriber handle
173+
* @param callback - pointer to the callback function
174+
* @param user_data - user data to be passed to the callback function
175+
* @return pti_result
176+
*/
177+
pti_result PTI_EXPORT
178+
ptiCallbackSubscribe(pti_callback_subscriber_handle* subscriber,
179+
pti_callback_function callback,
180+
void* user_data);
181+
182+
/**
183+
* @brief Unsubscribe Callback subscriber. This unsubscribes from all domains, disables the callback,
184+
* cleans up all resources related to the subscriber handle, and invalidates the handle.
185+
*/
186+
pti_result PTI_EXPORT
187+
ptiCallbackUnsubscribe(pti_callback_subscriber_handle subscriber);
188+
189+
/**
190+
* @brief Enables callbacks on specific domain
191+
*
192+
* @param subscriber - subscriber handle
193+
* @param domain - domain to enable
194+
* @param enter_cb - indicate if callback called on enter/start: 0-no, 1-yes; used only for domains with 2 phases
195+
* @param exit_cb - indicates if callback is called on exit/end: 0-no, 1-yes; used only for domains with 2 phases
196+
* @return pti_result
197+
*/
198+
pti_result PTI_EXPORT
199+
ptiCallbackEnableDomain(pti_callback_subscriber_handle subscriber,
200+
pti_callback_domain domain,
201+
uint32_t enter_cb,
202+
uint32_t exit_cb);
203+
204+
/**
205+
* @brief Disables callbacks for specific domain
206+
*/
207+
pti_result PTI_EXPORT
208+
ptiCallbackDisableDomain(pti_callback_subscriber_handle subscriber,
209+
pti_callback_domain domain);
210+
211+
/**
212+
* @brief Disables the callback of the subscriber for all domains
213+
*/
214+
pti_result PTI_EXPORT
215+
ptiCallbackDisableAllDomains(pti_callback_subscriber_handle subscriber);
216+
217+
/**
218+
* @brief Helper function to return stringified enum members for pti_callback_domain
219+
*
220+
* @return const char*
221+
*/
222+
PTI_EXPORT const char* ptiCallbackDomainTypeToString(pti_callback_domain domain);
223+
224+
/**
225+
* @brief Helper function to return stringified enum members for pti_callback_phase
226+
*
227+
* @return const char*
228+
*/
229+
PTI_EXPORT const char* ptiCallbackPhaseTypeToString(pti_callback_phase phase);
230+
231+
#if defined(__cplusplus)
232+
}
233+
#endif
234+
#endif // PTI_CALLBACK_H_

third_party/intel/backend/proton/include/pti/pti_driver_levelzero_api_ids.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
// ========= This file is autogenerated - do not modify ========
1111
// ========= Api file version used for generation: =============
1212
// ApiFile: ze_api.h
13-
// ApiVersion: * @version v1.12-r1.12.15
13+
// ApiVersion: * @version v1.13-r1.13.1
1414

1515
// https://github.com/oneapi-src/level-zero.git
16-
// commit: d7a44e0303722e754e711227e0334aae3fa52f9d - v1.20.2
16+
// commit: ff8c99d4abda00fba6d92548a9cb2f721764d9d0 - v1.24.2
1717

1818

1919
typedef enum _pti_api_id_driver_levelzero {
@@ -212,6 +212,17 @@ typedef enum _pti_api_id_driver_levelzero {
212212
zeFabricVertexGetDeviceExp_id=192,
213213
zelTracerSetEnabled_id=193,
214214
zelTracerCreate_id=194,
215+
zeRTASBuilderCreateExt_id=195,
216+
zeRTASBuilderGetBuildPropertiesExt_id=196,
217+
zeRTASBuilderBuildExt_id=197,
218+
zeRTASBuilderCommandListAppendCopyExt_id=198,
219+
zeRTASBuilderDestroyExt_id=199,
220+
zeRTASParallelOperationCreateExt_id=200,
221+
zeRTASParallelOperationGetPropertiesExt_id=201,
222+
zeRTASParallelOperationJoinExt_id=202,
223+
zeRTASParallelOperationDestroyExt_id=203,
224+
zeDriverRTASFormatCompatibilityCheckExt_id=204,
225+
zeDeviceGetVectorWidthPropertiesExt_id=205,
215226
} pti_api_id_driver_levelzero;
216227

217228
#endif

third_party/intel/backend/proton/include/pti/pti_metrics.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ extern "C" {
2020
DEVICE
2121
*****************************************************************************/
2222

23-
typedef void* pti_device_handle_t; //!< Abstraction of device within PTI
24-
2523
typedef struct _pti_pci_properties_t {
2624
uint8_t _domain;
2725
uint8_t _bus;

0 commit comments

Comments
 (0)