Skip to content

Commit 17336b7

Browse files
authored
Enhancement and bug fixes for 1.9.3 release
Misc infrastructure updates, update sampler support for bindless images, update new format support for images
1 parent 88819b5 commit 17336b7

File tree

15 files changed

+422
-144
lines changed

15 files changed

+422
-144
lines changed

scripts/Doxyfile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,7 +1226,7 @@ HTML_COLORSTYLE_GAMMA = 80
12261226
# The default value is: NO.
12271227
# This tag requires that the tag GENERATE_HTML is set to YES.
12281228

1229-
HTML_TIMESTAMP = YES
1229+
# HTML_TIMESTAMP = YES
12301230

12311231
# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
12321232
# documentation will contain a main index with vertical navigation menus that
@@ -1662,7 +1662,7 @@ EXTRA_SEARCH_MAPPINGS =
16621662
# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
16631663
# The default value is: YES.
16641664

1665-
GENERATE_LATEX = YES
1665+
GENERATE_LATEX = NO
16661666

16671667
# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
16681668
# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
@@ -1854,7 +1854,7 @@ LATEX_BIB_STYLE = plain
18541854
# The default value is: NO.
18551855
# This tag requires that the tag GENERATE_LATEX is set to YES.
18561856

1857-
LATEX_TIMESTAMP = YES
1857+
# LATEX_TIMESTAMP = YES
18581858

18591859
# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute)
18601860
# path from which the emoji images will be read. If a relative path is entered,
@@ -2142,7 +2142,8 @@ INCLUDE_FILE_PATTERNS =
21422142
# recursively expanded use the := operator instead of the = operator.
21432143
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
21442144

2145-
PREDEFINED = __cplusplus
2145+
PREDEFINED = __cplusplus \
2146+
"module=modul3"
21462147

21472148
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
21482149
# tag can be used to specify a list of macro names that should be expanded. The

scripts/core/EXT_EXP_BindlessImages.rst

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,17 @@ In this extension, we propose the following additions:
4747
* Provide a new image descriptor and flags for Bindless images.
4848
* Support for creation of images on linearly allocated memory backed by USM.
4949
* Extension API to create an image handle from pitched memory
50+
* Create Bindless sampled images
5051

5152
A "Bindless image" can be created by passing ${x}_image_bindless_exp_desc_t to pNext member of
5253
${x}_image_desc_t and set the flags value as ${X}_IMAGE_BINDLESS_EXP_FLAG_BINDLESS
5354

55+
A "Bindless sampled image" can be created by passing ${x}_image_bindless_exp_desc_t to pNext member of
56+
${x}_image_desc_t and setting the flags to a combination of ${X}_IMAGE_BINDLESS_EXP_FLAG_BINDLESS and ${X}_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE
57+
When image view is created from bindless sampled image, sampling modes can be redefined by passing sampler descriptor in pNext field of ${x}_image_bindless_exp_desc_t struct.
58+
Image view created from bindless sampled image without setting ${X}_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE is an unsampled image.
59+
Sampled image view can be created from bindless unsampled image by setting ${X}_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE and passing sampler descriptor in pNext field of ${x}_image_bindless_exp_desc_t struct.
60+
5461
This extension is complimentary to and may be used in conjunction with the `ZE_extension_image_view <https://spec.oneapi.io/level-zero/latest/core/EXT_ImageView.html#image-view-extension>`_ extension
5562

5663
Programming example with Bindless images
@@ -178,4 +185,101 @@ Programming example with pitched memory usage
178185
179186
// Once all operations on the image are complete we need destroy image handle and free memory
180187
${x}ImageDestroy(hImage);
181-
${x}MemFree(hContext, pitchedPtr);
188+
${x}MemFree(hContext, pitchedPtr);
189+
190+
Programming example with Bindless sampled images
191+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
192+
193+
.. parsed-literal::
194+
195+
// 2D image dimensions
196+
size_t imageWidth = 1024;
197+
size_t imageHeight = 1024;
198+
199+
// Single-precision float image format with one channel
200+
${x}_image_format_t imageFormat = {
201+
ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_FLOAT,
202+
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_X,
203+
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_X
204+
}
205+
206+
// Define sampler descriptor
207+
${x}_sampler_desc_t samplerDesc = {
208+
ZE_STRUCTURE_TYPE_SAMPLER_DESC,
209+
nullptr,
210+
ZE_SAMPLER_ADDRESS_MODE_CLAMP,
211+
ZE_SAMPLER_FILTER_MODE_LINEAR,
212+
true
213+
};
214+
215+
// Create an image descriptor for bindless image
216+
${x}_image_desc_t imageDesc = {
217+
ZE_STRUCTURE_TYPE_IMAGE_DESC,
218+
nullptr,
219+
0,
220+
ZE_IMAGE_TYPE_2D,
221+
imageFormat,
222+
imageWidth, imageHeight, 0, 0, 0
223+
};
224+
225+
${x}_image_bindless_exp_desc_t bindlessImageDesc = {ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC};
226+
bindlessImageDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS | ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE;
227+
imageDesc.pNext = &bindlessImageDesc;
228+
229+
bindlessImageDesc.pNext = &samplerDesc;
230+
231+
// Create bindless sampled image
232+
// pass ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS and ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE to zeImageCreate(),
233+
${x}_image_handle_t hImage;
234+
${x}ImageCreate(hContext, hDevice, &imageDesc, &hImage);
235+
236+
// Create an image view from bindless sampled image
237+
// define sampler descriptor for view
238+
${x}_sampler_desc_t samplerDescForView = {
239+
ZE_STRUCTURE_TYPE_SAMPLER_DESC,
240+
nullptr,
241+
ZE_SAMPLER_ADDRESS_MODE_CLAMP,
242+
ZE_SAMPLER_FILTER_MODE_NEAREST,
243+
true
244+
};
245+
246+
${x}_image_format_t imageViewFormat = {
247+
ZE_IMAGE_FORMAT_LAYOUT_32, ZE_IMAGE_FORMAT_TYPE_UINT,
248+
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_X,
249+
ZE_IMAGE_FORMAT_SWIZZLE_R, ZE_IMAGE_FORMAT_SWIZZLE_X
250+
}
251+
252+
// image descriptor for bindless image view
253+
${x}_image_desc_t imageViewDesc = {
254+
ZE_STRUCTURE_TYPE_IMAGE_DESC,
255+
nullptr,
256+
0,
257+
ZE_IMAGE_TYPE_2D,
258+
imageViewFormat,
259+
128, 128, 0, 0, 0
260+
};
261+
imageViewDesc.pNext = &bindlessImageDesc;
262+
bindlessImageDesc.pNext = &samplerDescForView;
263+
${x}_image_handle_t hImageView;
264+
265+
${x}ImageViewCreateExt(hContext, hDevice, &imageViewDesc, hImage, &hImageView);
266+
267+
// If ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE is not set, unsampled image is created
268+
${x}_image_handle_t hUnsampledImageView;
269+
bindlessImageDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS;
270+
bindlessImageDesc.pNext = nullptr;
271+
${x}ImageViewCreateExt(hContext, hDevice, &imageViewDesc, hImage, &hUnsampledImageView);
272+
273+
// Create an image view from bindless unsampled image
274+
${x}_image_handle_t hUnsampledImage;
275+
${x}_image_handle_t hSampledImageView;
276+
bindlessImageDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS;
277+
bindlessImageDesc.pNext = nullptr;
278+
imageDesc.pNext = &bindlessImageDesc;
279+
280+
// create unsampled image
281+
${x}ImageCreate(hContext, hDevice, &imageDesc, &hUnsampledImage);
282+
283+
bindlessImageDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS | ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE;
284+
bindlessImageDesc.pNext = &samplerDescForView;
285+
${x}ImageViewCreateExt(hContext, hDevice, &imageDesc, hUnsampledImage, &hSampledImageView);

scripts/core/EXT_Exp_ImageView.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ from templates import helper as th
66
x=tags['$x']
77
X=x.upper()
88
%>
9+
10+
<%!
11+
from parse_specs import _version_compare_gequal
12+
%>
13+
914
:orphan:
1015

1116
.. _ZE_experimental_image_view:
@@ -14,7 +19,7 @@ from templates import helper as th
1419
Image View Extension
1520
=========================
1621

17-
%if ver >= 1.5:
22+
%if _version_compare_gequal(ver, "1.5"):
1823
This experimental extension is deprecated and replaced by the :ref:`${th.subt(namespace, tags, X)}_extension_image_view <${th.subt(namespace, tags, X)}_extension_image_view>` standard extension.
1924
%endif
2025

scripts/core/EXT_Exp_ImageViewPlanar.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ from templates import helper as th
66
x=tags['$x']
77
X=x.upper()
88
%>
9+
10+
<%!
11+
from parse_specs import _version_compare_gequal
12+
%>
13+
914
:orphan:
1015

1116
.. _ZE_experimental_image_view_planar:
@@ -14,7 +19,7 @@ from templates import helper as th
1419
Image View Planar Extension
1520
=============================
1621

17-
%if ver >= 1.5:
22+
%if _version_compare_gequal(ver, "1.5"):
1823
This experimental extension is deprecated and replaced by the :ref:`${th.subt(namespace, tags, X)}_extension_image_view_planar <${th.subt(namespace, tags, X)}_extension_image_view_planar>` standard extension.
1924
%endif
2025

scripts/core/PROG.rst

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
x=tags['$x']
55
X=x.upper()
66
%>
7+
8+
<%!
9+
from parse_specs import _version_compare_less, _version_compare_gequal
10+
%>
11+
712
.. _core-programming-guide:
813

914
========================
@@ -47,7 +52,7 @@ The following diagram illustrates the relationship between the driver, device an
4752

4853
.. image:: ../images/core_device.png
4954

50-
%if ver >= 1.7:
55+
%if _version_compare_gequal(ver, "1.7"):
5156
Level Zero device model hierarchy is composed of **Root Devices** and **Sub-Devices**: A root-device may contain two or more sub-devices and a sub-device shall belong to a single root-device.
5257
A root-device may not contain a single sub-device, as that would be the same root-device. A root device may also be a device with no sub-devices.
5358

@@ -620,10 +625,10 @@ External memory handles may be imported from other APIs, or exported for use in
620625
Importing and exporting external memory is an optional feature.
621626
Devices may describe the types of external memory handles they support using ${x}DeviceGetExternalMemoryProperties.
622627

623-
%if ver >= 1.5:
628+
%if _version_compare_gequal(ver, "1.5"):
624629
Importing and exporting external memory is supported for device and host memory allocations and images.
625630
%endif
626-
%if ver < 1.5:
631+
%if _version_compare_less(ver, "1.5"):
627632
Importing and exporting external memory is supported for device memory allocations and images.
628633
%endif
629634

@@ -1104,10 +1109,10 @@ A kernel timestamp event is a special type of event that records device timestam
11041109
.. parsed-literal::
11051110
11061111
// Get timestamp frequency
1107-
%if ver >= 1.1:
1112+
%if _version_compare_gequal(ver, "1.1"):
11081113
const double timestampFreq = NS_IN_SEC / device_properties.timerResolution;
11091114
%endif
1110-
%if ver < 1.1:
1115+
%if _version_compare_less(ver, "1.1"):
11111116
const uint64_t timestampFreq = device_properties.timerResolution;
11121117
%endif
11131118
const uint64_t timestampMaxValue = ~(-1L << device_properties.kernelTimestampValidBits);
@@ -1712,7 +1717,7 @@ Environment Variables
17121717

17131718
The following table documents the supported knobs for overriding default functional behavior.
17141719

1715-
%if ver < 1.7:
1720+
%if _version_compare_less(ver, "1.7"):
17161721

17171722
+-----------------+-------------------------------------+------------+-----------------------------------------------------------------------------------+
17181723
| Category | Name | Values | Description |
@@ -1726,7 +1731,7 @@ The following table documents the supported knobs for overriding default functio
17261731

17271732
%endif
17281733

1729-
%if ver >= 1.7:
1734+
%if _version_compare_gequal(ver, "1.7"):
17301735

17311736
+-----------------+-------------------------------------+-----------------------------------+-----------------------------------------------------------------------------------+
17321737
| Category | Name | Values | Description |
@@ -1766,7 +1771,7 @@ The values are specific to system configuration; e.g., the number of devices and
17661771
The values are specific to the order in which devices are reported by the driver; i.e., the first device maps to ordinal 0, the second device to ordinal 1, and so forth.
17671772
If the affinity mask is not set, then all devices and sub-devices are reported; as is the default behavior.
17681773

1769-
%if ver >= 1.7:
1774+
%if _version_compare_gequal(ver, "1.7"):
17701775
The affinity mask masks the devices as defined by value set in the ${X}_FLAT_DEVICE_HIERARCHY environment variable, i.e., a Level Zero driver shall read
17711776
first ${X}_FLAT_DEVICE_HIERARCHY to determine the device handles to be used by the application and then interpret the values passed in ${X}_AFFINITY_MASK
17721777
based on the device model selected.
@@ -1776,7 +1781,7 @@ The order of the devices reported by the ${x}DeviceGet is implementation-specifi
17761781

17771782
The order of the devices reported by the ${x}DeviceGet can be forced to be consistent by setting the ${X}_ENABLE_PCI_ID_DEVICE_ORDER environment variable.
17781783

1779-
%if ver < 1.7:
1784+
%if _version_compare_less(ver, "1.7"):
17801785
The following examples demonstrate proper usage for a system configuration of two devices, each with four sub-devices:
17811786

17821787
- `0, 1`: all devices and sub-devices are reported (same as default)
@@ -1788,7 +1793,7 @@ The following examples demonstrate proper usage for a system configuration of tw
17881793

17891794
%endif
17901795

1791-
%if ver >= 1.7:
1796+
%if _version_compare_gequal(ver, "1.7"):
17921797
The following examples demonstrate proper usage for a system configuration composed of two physical devices, each of which can be further
17931798
sub-divided into four smaller devices. For the purpose of these examples, we will refer to the two physical devices as `parent devices`
17941799
and to the smaller sub-devices as `tiles`.
@@ -2125,10 +2130,10 @@ such as multiple levels of indirection, there are two methods available:
21252130

21262131
+ If the driver is unable to make all allocations resident, then the call to ${x}CommandQueueExecuteCommandLists will return ${X}_RESULT_ERROR_OUT_OF_DEVICE_MEMORY
21272132

2128-
%if ver >= 1.6:
2133+
%if _version_compare_gequal(ver, "1.6"):
21292134
2. Explicit ${x}ContextMakeMemoryResident APIs are included for the application to dynamically change residency as needed.
21302135
%endif
2131-
%if ver < 1.6:
2136+
%if _version_compare_less(ver, "1.6"):
21322137
2. Explicit ${x}ContextMakeMemoryResident APIs are included for the application to dynamically change residency as needed. (Windows-only)
21332138
%endif
21342139

@@ -2283,18 +2288,18 @@ The following code examples demonstrate how to use the memory IPC APIs:
22832288
${x}MemCloseIpcHandle(hContext, dptr);
22842289
22852290
2286-
%if ver >= 1.6:
2291+
%if _version_compare_gequal(ver, "1.6"):
22872292
5. Finally, return the IPC handle to the driver with ${x}MemPutIpcHandle and
22882293
free the device pointer in the sending process. If ${x}MemPutIpcHandle is not called,
22892294
any actions performed by that call are eventually done by ${x}MemFree.
22902295
%endif
2291-
%if ver < 1.6:
2296+
%if _version_compare_less(ver, "1.6"):
22922297
5. Finally, free the device pointer in the sending process:
22932298
%endif
22942299

22952300
.. parsed-literal::
22962301
2297-
%if ver >= 1.6:
2302+
%if _version_compare_gequal(ver, "1.6"):
22982303
${x}MemPutIpcHandle(hContext, hIpc);
22992304
%endif
23002305
${x}MemFree(hContext, dptr);
@@ -2384,19 +2389,19 @@ Note, there is no guaranteed address equivalence for the values of ``hEvent`` in
23842389
${x}EventDestroy(hEvent);
23852390
${x}EventPoolCloseIpcHandle(&hEventPool);
23862391
2387-
%if ver >= 1.6:
2392+
%if _version_compare_gequal(ver, "1.6"):
23882393
5. Finally, return the IPC handle to the driver with ${x}EventPoolPutIpcHandle and
23892394
free the event pool in the sending process. If ${x}EventPoolPutIpcHandle is not called,
23902395
any actions performed by that call are eventually done by ${x}EventPoolDestroy.
23912396
%endif
2392-
%if ver < 1.6:
2397+
%if _version_compare_less(ver, "1.6"):
23932398
5. Finally, free the event pool handle in the sending process:
23942399
%endif
23952400

23962401
.. parsed-literal::
23972402
23982403
${x}EventDestroy(hEvent);
2399-
%if ver >= 1.6:
2404+
%if _version_compare_gequal(ver, "1.6"):
24002405
${x}EventPoolPutIpcHandle(hContext, hIpcEventPool);
24012406
%endif
24022407
${x}EventPoolDestroy(hEventPool);

scripts/core/SPIRV.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ from templates import helper as th
77
x=tags['$x']
88
X=x.upper()
99
%>
10+
<%!
11+
from parse_specs import _version_compare_gequal
12+
%>
13+
1014
==========================
1115
SPIR-V Programming Guide
1216
==========================
@@ -412,7 +416,7 @@ The following restrictions apply to the
412416
words, the write must begin at a 32-bit boundary. There is no
413417
restriction on the y-component of the coordinate.
414418

415-
%if ver >= 1.1:
419+
%if _version_compare_gequal(ver, "1.1"):
416420
Floating-Point Atomics
417421
----------------------
418422

@@ -459,7 +463,7 @@ Additionally:
459463

460464
%endif
461465

462-
%if ver >= 1.2:
466+
%if _version_compare_gequal(ver, "1.2"):
463467
Extended Subgroups
464468
------------------
465469

@@ -651,7 +655,7 @@ optional *ClusterSize* operand.
651655

652656
%endif
653657

654-
%if ver >= 1.2:
658+
%if _version_compare_gequal(ver, "1.2"):
655659
Linkonce ODR
656660
------------
657661

@@ -664,7 +668,7 @@ include the **LinkOnceODR** linkage type.
664668

665669
%endif
666670

667-
%if ver >= 1.5:
671+
%if _version_compare_gequal(ver, "1.5"):
668672
Bfloat16 Conversions
669673
--------------------
670674

0 commit comments

Comments
 (0)