|
| 1 | +<% |
| 2 | +import re |
| 3 | +from templates import helper as th |
| 4 | +%><% |
| 5 | + OneApi=tags['$OneApi'] |
| 6 | + x=tags['$x'] |
| 7 | + X=x.upper() |
| 8 | +%> |
| 9 | +:orphan: |
| 10 | + |
| 11 | +.. _ZE_extension_driver_ddi_handles: |
| 12 | + |
| 13 | +========================================== |
| 14 | + Driver Direct Device Interface (DDI) Handles Extension |
| 15 | +========================================== |
| 16 | + |
| 17 | +API |
| 18 | +---- |
| 19 | + |
| 20 | +* Enumerations |
| 21 | + * ${x}_driver_ddi_handles_ext_version_t |
| 22 | + |
| 23 | +* Structures |
| 24 | + * ${x}_driver_ddi_handle_ext_flags_t |
| 25 | + * ${x}_driver_ddi_handles_ext_properties_t |
| 26 | + * ze_handle_t |
| 27 | + * ze_dditable_driver_t |
| 28 | + * zet_dditable_driver_t |
| 29 | + * zes_dditable_driver_t |
| 30 | + |
| 31 | +See `ze*_ddi.h` for definitions of `ze_dditable_driver_t`, `zet_dditable_driver_t`, `zes_dditable_driver_t`. |
| 32 | +`ze_handle_t` is defined in ze_ddi_common.h. To use it, include `ze_ddi_common.h`. |
| 33 | + |
| 34 | +Handle Structures: |
| 35 | +~~~~~~~~~~~~~~~~~ |
| 36 | + |
| 37 | + /// @brief Handle with pointer to Dispatch Container allocated by the driver at the beginning of every L0 Core handle |
| 38 | + typedef struct _ze_handle_t |
| 39 | + { |
| 40 | + ze_dditable_driver_t *pCore; // [in] pointer to _ze_dditable_t_ object related to this handle |
| 41 | + zet_dditable_driver_t *pTools; // [in] pointer to _zet_dditable_t_ object related to this handle |
| 42 | + zes_dditable_driver_t *pSysman; // [in] pointer to _zes_dditable_t_ object related to this handle |
| 43 | + } ze_handle_t; |
| 44 | +
|
| 45 | +DDI Handles Extension: |
| 46 | +~~~~~~~~~~~~~~~~~~~~~ |
| 47 | + |
| 48 | +Nowadays there are many possible scenarios of multiple Level Zero runtimes in the system (GPU + NPU, dGPU + iGPU on Windows, mainstream GPU + legacy GPU on Linux). |
| 49 | +Our current handling of multi runtime scenario in Loader has many issues: |
| 50 | + - performance degradation |
| 51 | + - Level Zero API handles are stored in per-type maps, accessing these maps costs, especially in hot paths |
| 52 | + - putting and removing entries from map needs additional mutex to ensure thread-safety |
| 53 | + - there is significant perfomance difference between runing application when there is only one runtime active and many runtimes active, even if only one is really used by application. |
| 54 | + As initialization overhead is acceptable, any further API calls should not cause performance degradation. E.g. when running [`SetKernelArgSvmPointer`](https://github.com/intel/compute-benchmarks/blob/master/source/benchmarks/api_overhead_benchmark/implementations/l0/set_kernel_arg_svm_pointer_l0.cpp) test from [`api_overhead_benchmark`](https://github.com/intel/compute-benchmarks/tree/master/source/benchmarks/api_overhead_benchmark) there is degradation from 0.817us to 2.724us caused only by presence of second runtime on the system. |
| 55 | + - when using driver extensions, application bypasses Level Zero loader and calls runtime functions directly |
| 56 | + - handles that application got from loader is wrapped by loader's handle type and can't be passed to runtime directly, therefore translation is required; |
| 57 | + translation could be applied on two sides - app side / runtime side. How to detect double translation? |
| 58 | + - extension may create ze_event_t object that can be then used in core API - loader cannot recognize it |
| 59 | + |
| 60 | +The solution which solves this problem is to define a base layout of every handle type directly in L0 spec ensuring that all L0 API handles contain a header with pointers to appropriate driver ddi tables. |
| 61 | +Backward compatibility is covered as new loader will still support runtimes that do not support this extension. |
| 62 | +Loader should verify the support based on extension property queried from zeDriverGetExtensionProperties. |
| 63 | + |
| 64 | +Example Implementation of L0 Loader functions: |
| 65 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 66 | + |
| 67 | +.. parsed-literal:: |
| 68 | + ZE_APIEXPORT ze_result_t ZE_APICALL zeContextCreate( |
| 69 | + ze_driver_handle_t hDriver, |
| 70 | + const ze_context_desc_t *desc, |
| 71 | + ze_context_handle_t *phContext) { |
| 72 | + return hDriver->pCore->pContext->pfnCreate(hDriver, desc, phContext); |
| 73 | + } |
0 commit comments