|
| 1 | += sycl_ext_oneapi_range_type |
| 2 | + |
| 3 | +:source-highlighter: coderay |
| 4 | +:coderay-linenums-mode: table |
| 5 | + |
| 6 | +// This section needs to be after the document title. |
| 7 | +:doctype: book |
| 8 | +:toc2: |
| 9 | +:toc: left |
| 10 | +:encoding: utf-8 |
| 11 | +:lang: en |
| 12 | +:dpcpp: pass:[DPC++] |
| 13 | +:endnote: —{nbsp}end{nbsp}note |
| 14 | + |
| 15 | +// Set the default source code type in this document to C++, |
| 16 | +// for syntax highlighting purposes. This is needed because |
| 17 | +// docbook uses c++ and html5 uses cpp. |
| 18 | +:language: {basebackend@docbook:c++:cpp} |
| 19 | + |
| 20 | + |
| 21 | +== Notice |
| 22 | + |
| 23 | +[%hardbreaks] |
| 24 | +Copyright (C) 2024 Intel Corporation. All rights reserved. |
| 25 | + |
| 26 | +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks |
| 27 | +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by |
| 28 | +permission by Khronos. |
| 29 | + |
| 30 | + |
| 31 | +== Contact |
| 32 | + |
| 33 | +To report problems with this extension, please open a new issue at: |
| 34 | + |
| 35 | +https://github.com/intel/llvm/issues |
| 36 | + |
| 37 | + |
| 38 | +== Dependencies |
| 39 | + |
| 40 | +This extension is written against the SYCL 2020 revision 9 specification. All |
| 41 | +references below to the "core SYCL specification" or to section numbers in the |
| 42 | +SYCL specification refer to that revision. |
| 43 | + |
| 44 | +This extension also depends on the following other SYCL extensions: |
| 45 | + |
| 46 | +* link:../experimental/sycl_ext_oneapi_kernel_properties.asciidoc[ |
| 47 | + sycl_ext_oneapi_kernel_properties] |
| 48 | + |
| 49 | + |
| 50 | +== Status |
| 51 | + |
| 52 | +This is a proposed extension specification, intended to gather community |
| 53 | +feedback. Interfaces defined in this specification may not be implemented yet |
| 54 | +or may be in a preliminary state. The specification itself may also change in |
| 55 | +incompatible ways before it is finalized. *Shipping software products should |
| 56 | +not rely on APIs defined in this specification.* |
| 57 | + |
| 58 | + |
| 59 | +== Overview |
| 60 | + |
| 61 | +The maximum number of work-items that can be launched in a single kernel |
| 62 | +depends on multiple factors. |
| 63 | +SYCL 2020 says that the total number of work-items must be representable as a |
| 64 | +`size_t`, but several implementations (including {dpcpp}) provide optimization |
| 65 | +options to assert that kernels will not require the full range of a `size_t`. |
| 66 | + |
| 67 | +This extension proposes a new kernel property that allows developers to declare |
| 68 | +the range requirements of individual kernels, providing more fine-grained |
| 69 | +control than existing compiler options and improved error behavior. |
| 70 | + |
| 71 | +The property described in this extension is an advanced feature that most |
| 72 | +applications should not need to use. |
| 73 | +In most cases, applications get the best performance without using this |
| 74 | +property. |
| 75 | + |
| 76 | + |
| 77 | +== Specification |
| 78 | + |
| 79 | +=== Feature test macro |
| 80 | + |
| 81 | +This extension provides a feature-test macro as described in the core SYCL |
| 82 | +specification. An implementation supporting this extension must predefine the |
| 83 | +macro `SYCL_EXT_ONEAPI_RANGE_TYPE` to one of the values defined in the table |
| 84 | +below. Applications can test for the existence of this macro to determine if |
| 85 | +the implementation supports this feature, or applications can test the macro's |
| 86 | +value to determine which of the extension's features the implementation |
| 87 | +supports. |
| 88 | + |
| 89 | +[%header,cols="1,5"] |
| 90 | +|=== |
| 91 | +|Value |
| 92 | +|Description |
| 93 | + |
| 94 | +|1 |
| 95 | +|The APIs of this experimental extension are not versioned, so the |
| 96 | + feature-test macro always has this value. |
| 97 | +|=== |
| 98 | + |
| 99 | +=== New kernel property |
| 100 | + |
| 101 | +```c++ |
| 102 | +namespace sycl::ext::oneapi::experimental { |
| 103 | + |
| 104 | +struct range_type_key { |
| 105 | + template <typename T> |
| 106 | + using value_t = property_value<range_type_key, T>; |
| 107 | +}; |
| 108 | + |
| 109 | +template <typename T> |
| 110 | +inline constexpr range_type_key::value_t<T> range_type; |
| 111 | + |
| 112 | +} // namespace sycl::ext::oneapi::experimental |
| 113 | +``` |
| 114 | + |
| 115 | +|=== |
| 116 | +|Property|Description |
| 117 | + |
| 118 | +|`range_type` |
| 119 | +|The `range_type` property adds the requirement that the kernel must be |
| 120 | +compatible with kernel launches where the linear index of a work-item lies |
| 121 | +in the range [0, `std::numeric_limits<T>::max()`). |
| 122 | + |
| 123 | +If the implementation cannot satisfy this requirement, the implementation |
| 124 | +must throw an `exception` with the `errc::kernel_not_supported` error code, |
| 125 | +regardless of launch configuration. |
| 126 | +If the implementation can satisfy this requirement, but the kernel is |
| 127 | +launched with an incompatible configuration, the implementation must throw |
| 128 | +an `exception` with the `errc::nd_range` error code. |
| 129 | + |
| 130 | +`T` must be an integral type. |
| 131 | + |
| 132 | +|=== |
| 133 | + |
| 134 | +== Usage example |
| 135 | + |
| 136 | +```c++ |
| 137 | +namespace syclex = sycl::ext::oneapi::experimental; |
| 138 | + |
| 139 | +struct SmallKernel |
| 140 | +{ |
| 141 | + // Declare that this kernel supports [0, 2^31-1) work-items. |
| 142 | + auto get(syclex::properties_tag) const { |
| 143 | + return syclex::properties{syclex::range_type<int>}; |
| 144 | + } |
| 145 | +}; |
| 146 | + |
| 147 | +struct LargeKernel |
| 148 | +{ |
| 149 | + // Declare that this kernel supports [0, 2^64-1) work-items. |
| 150 | + auto get(syclex::properties_tag) const { |
| 151 | + return syclex::properties{syclex::range_type<size_t>}; |
| 152 | + } |
| 153 | +}; |
| 154 | + |
| 155 | +... |
| 156 | + |
| 157 | +// Throws an exception with errc::nd_range error code. |
| 158 | +// (because 2147483648 > 2147483647) |
| 159 | +q.parallel_for(2147483648, SmallKernel()); |
| 160 | + |
| 161 | +// May throw an exception with errc::kernel_not_supported error code. |
| 162 | +// (if implementation/device doesn't support 64-bit ranges) |
| 163 | +q.parallel_for(2147483648, LargeKernel()); |
| 164 | +``` |
| 165 | + |
| 166 | +== Issues |
| 167 | + |
| 168 | +None. |
0 commit comments