1010:encoding: utf-8
1111:lang: en
1212:dpcpp: pass:[DPC++]
13+ :endnote: —{nbsp}end{nbsp}note
1314
1415// Set the default source code type in this document to C++,
1516// for syntax highlighting purposes. This is needed because
@@ -106,61 +107,74 @@ If the `sycl::nd_range` parameter used to launch a kernel is incompatible with
106107the results of a kernel's launch queries, an implementation must throw a
107108synchronous exception with the `errc::nd_range` error code.
108109
109- [NOTE]
110- ====
111- The values returned by `ext_oneapi_get_info` account for all properties
112- attached to a kernel (via the mechanisms defined in the
110+ [_Note_: The values returned by `ext_oneapi_get_info` account for all
111+ properties attached to a kernel (via the mechanisms defined in the
113112sycl_ext_oneapi_kernel_properties extension), as well as the usage of features
114- like group algorithms and work-group local memory. Developers should assume
115- that the values will differ across kernels.
116- ====
113+ like group algorithms and work-group local memory.
114+ Developers should assume that the values will differ across
115+ kernels._{endnote}_]
117116
118117[source,c++]
119118----
120119namespace sycl {
121120
122121class kernel {
123122 public:
124- template <typename Param, typename... T>
125- /*return-type*/ ext_oneapi_get_info(T... args) const;
126- };
127123
128- }
129- ----
124+ // Only available if Param is max_work_item_sizes<1>
125+ template <typename Param>
126+ id<1> ext_oneapi_get_info(sycl::queue q) const;
130127
131- [source,c++]
132- ----
133- template <typename Param, typename... T>
134- /*return-type*/ ext_oneapi_get_info(T... args) const;
135- ----
136- _Constraints_: Available only when the types `+T...+` described by the parameter
137- pack match the types defined in the table below.
128+ // Only available if Param is max_work_item_sizes<2>
129+ template <typename Param>
130+ id<2> ext_oneapi_get_info(sycl::queue q) const;
138131
139- _Preconditions_: `Param` must be one of the `info::kernel` descriptors defined
140- in this extension.
132+ // Only available if Param is max_work_item_sizes<3>
133+ template <typename Param>
134+ id<3> ext_oneapi_get_info(sycl::queue q) const;
141135
142- _Returns_: Information about the kernel that applies when the kernel is
143- submitted with the configuration described by the parameter pack `+T...+`.
144- The return type is defined in the table below.
136+ // Only available if Param is max_work_group_size
137+ template <typename Param>
138+ size_t ext_oneapi_get_info(sycl::queue q) const;
145139
146- This extension adds several new queries to this interface, many of which
147- already have equivalents in the `kernel_device_specific` or `device`
148- namespaces.
140+ // Only available if Param is max_num_work_groups
141+ template <typename Param>
142+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const;
149143
150- NOTE: These queries are queue- and not device-specific because it is
151- anticipated that implementations will introduce finer-grained queue
152- controls that impact the scheduling of kernels.
144+ // Only available if Param is max_num_work_groups
145+ template <typename Param>
146+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const;
153147
154- NOTE: Allowing devices to return a value of 1 for these queries maximizes the
155- chances that code written to use certain extension remains portable. However,
156- the performance of kernels using only one work-group, sub-group or work-item
157- may be limited on some (highly parallel) devices. If certain properties (e.g.
158- forward progress guarantees, cross-work-group synchronization) are being used
159- as part of a performance optimization, developers should check that the values
160- returned by these queries is not 1.
148+ // Only available if Param is max_num_work_groups
149+ template <typename Param>
150+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const;
151+
152+ // Only available if Param is max_sub_group_size
153+ template <typename Param>
154+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
155+
156+ // Only available if Param is max_sub_group_size
157+ template <typename Param>
158+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
159+
160+ // Only available if Param is max_sub_group_size
161+ template <typename Param>
162+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
163+
164+ // Only available if Param is num_sub_groups
165+ template <typename Param>
166+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
167+
168+ // Only available if Param is num_sub_groups
169+ template <typename Param>
170+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
171+
172+ // Only available if Param is num_sub_groups
173+ template <typename Param>
174+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
175+
176+ };
161177
162- [source, c++]
163- ----
164178namespace ext::oneapi::experimental::info::kernel {
165179
166180template <uint32_t Dimensions>
@@ -169,91 +183,156 @@ struct max_work_item_sizes;
169183struct max_work_group_size;
170184struct max_num_work_groups;
171185
172- }
186+ struct max_sub_group_size;
187+ struct num_sub_groups;
188+
189+ } // namespace ext::oneapi::experimental::info::kernel
190+
191+ } // namespace sycl
173192----
174193
175- [%header,cols="1,5,5,5"]
176- |===
177- |Kernel Descriptor
178- |Argument Types
179- |Return Type
180- |Description
194+ ==== Querying valid launch configurations
195+
196+ This extension adds several new queries for reasoning about the set of valid
197+ launch configurations for a given kernel, many of which already have
198+ equivalents in the `kernel_device_specific` or `device` namespaces.
199+
200+ [_Note_: These queries are queue- and not device-specific because it is
201+ anticipated that implementations will introduce finer-grained queue
202+ controls that impact the scheduling of kernels._{endnote}_]
203+
204+ [_Note_: Allowing devices to return a value of 1 for these queries maximizes
205+ the chances that code written to use certain extension remains portable.
206+ However, the performance of kernels using only one work-group, sub-group or
207+ work-item may be limited on some (highly parallel) devices.
208+ If certain properties (e.g. forward progress guarantees, cross-work-group
209+ synchronization) are being used as part of a performance optimization,
210+ developers should check that the values returned by these queries is not
211+ 1._{endnote}_]
212+
213+ '''
214+
215+ [source,c++]
216+ ----
217+ template <typename Param>
218+ id<1> ext_oneapi_get_info(sycl::queue q) const; // (1)
219+
220+ template <typename Param>
221+ id<2> ext_oneapi_get_info(sycl::queue q) const; // (2)
222+
223+ template <typename Param>
224+ id<3> ext_oneapi_get_info(sycl::queue q) const; // (3)
225+ ----
226+ _Constraints (1)_: `Param` is `max_work_item_sizes<1>`.
227+
228+ _Constraints (2)_: `Param` is `max_work_item_sizes<2>`.
229+
230+ _Constraints (3)_: `Param` is `max_work_item_sizes<3>`.
231+
232+ _Returns_: The maximum number of work-items that are permitted in each
233+ dimension of a work-group, when the kernel is submitted to the specified queue,
234+ accounting for any kernel properties or features.
235+ If the kernel can be submitted to the specified queue without an error, the
236+ minimum value returned by this query is 1, otherwise it is 0.
181237
182- |`template <uint32_t Dimensions>
183- max_work_item_sizes`
184- |`sycl::queue`
185- |`id<Dimensions>`
186- |Returns the maximum number of work-items that are permitted in each dimension
187- of a work-group, when the kernel is submitted to the specified queue,
188- accounting for any kernel properties or features. If the kernel can be
189- submitted to the specified queue without an error, the minimum value returned
190- by this query is 1, otherwise it is 0.
191-
192- |`max_work_group_size`
193- |`sycl::queue`
194- |`size_t`
195- |Returns the maximum number of work-items that are permitted in a work-group,
238+ '''
239+
240+ [source,c++]
241+ ----
242+ template <typename Param>
243+ size_t ext_oneapi_get_info(sycl::queue q) const;
244+ ----
245+ _Constraints_: `Param` is `max_work_group_size`.
246+
247+ _Returns_: The maximum number of work-items that are permitted in a work-group,
196248when the kernel is submitted to the specified queue, accounting for any
197- kernel properties or features. If the kernel can be submitted to the specified
198- queue without an error, the minimum value returned by this query is 1,
199- otherwise it is 0.
200-
201- |`max_num_work_groups`
202- |`sycl::queue`, `sycl::range`, `size_t`
203- |`size_t`
204- |Returns the maximum number of work-groups, when the kernel is submitted to the
205- specified queue with the specified work-group size and the specified amount of
206- dynamic work-group local memory (in bytes), accounting for any kernel
207- properties or features. If the specified work-group size is 0, which is
208- invalid, then the implementation will throw a synchronous exception with the
209- `errc::invalid` error code. If the kernel can be submitted to the specified
210- queue without an error, the minimum value returned by this query is 1,
211- otherwise it is 0.
249+ kernel properties or features.
250+ If the kernel can be submitted to the specified queue without an error, the
251+ minimum value returned by this query is 1, otherwise it is 0.
212252
213- |===
253+ '''
214254
215- A separate set of launch queries can be used to reason about how an
216- implementation will launch a kernel on the specified queue. The values of these
217- queries should also be checked if a kernel is expected to be launched in a
218- specific way (e.g., if the kernel requires two sub-groups for correctness).
255+ [source,c++]
256+ ----
257+ template <typename Param>
258+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r, size_t bytes = 0) const;
219259
220- [source, c++]
260+ template <typename Param>
261+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r, size_t bytes = 0) const;
262+
263+ template <typename Param>
264+ size_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r, size_t bytes = 0) const;
221265----
222- namespace ext::oneapi::experimental::info::kernel {
266+ _Constraints_: `Param` is `max_num_work_groups`.
223267
224- struct max_sub_group_size;
225- struct num_sub_groups;
268+ _Returns_: The maximum number of work-groups, when the kernel is submitted to
269+ the specified queue with the specified work-group size and the specified amount
270+ of dynamic work-group local memory (in bytes), accounting for any kernel
271+ properties or features.
272+ If the kernel can be submitted to the specified queue without an
273+ error, the minimum value returned by this query is 1, otherwise it is 0.
274+
275+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
276+ work-group size `r` is 0.
277+
278+
279+ ==== Querying launch behavior
280+
281+ A separate set of launch queries can be used to reason about how an
282+ implementation will launch a kernel on the specified queue.
283+ The values of these queries should also be checked if a kernel is expected to
284+ be launched in a specific way (e.g., if the kernel requires two sub-groups for
285+ correctness).
286+
287+ '''
226288
227- }
289+ [source,c++]
228290----
291+ template <typename Param>
292+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
229293
230- [%header,cols="1,5,5,5"]
231- |===
232- |Kernel Descriptor
233- |Argument Types
234- |Return Type
235- |Description
294+ template <typename Param>
295+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
236296
237- |`max_sub_group_size`
238- |`sycl::queue`, `sycl::range`
239- |`uint32_t`
240- |Returns the maximum sub-group size, when the kernel is submitted to the
297+ template <typename Param>
298+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
299+ ----
300+ _Constraints_: `Param` is `max_sub_group_size`.
301+
302+ _Returns_: The maximum sub-group size, when the kernel is submitted to the
241303specified queue with the specified work-group size, accounting for any kernel
242- properties or features. The return value of this query must match the value
243- returned by `sub_group::get_max_local_range()` inside the kernel. If the kernel
244- can be submitted to the specified queue without an error, the minimum value
245- returned by this query is 1, otherwise it is 0.
246-
247- |`num_sub_groups`
248- |`sycl::queue`, `sycl::range`
249- |`uint32_t`
250- |Returns the number of sub-groups per work-group, when the kernel is submitted
251- to the specified queue with the specified work-group size, accounting for any
252- kernel properties or features. If the kernel can be submitted to the specified
253- queue without an error, the minimum value returned by this query is 1,
254- otherwise it is 0.
304+ properties or features.
305+ The return value of this query must match the value returned by
306+ `sub_group::get_max_local_range()` inside the kernel.
307+ If the kernel can be submitted to the specified queue without an error, the
308+ minimum value returned by this query is 1, otherwise it is 0.
255309
256- |===
310+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
311+ work-group size `r` is 0.
312+
313+ '''
314+
315+ [source,c++]
316+ ----
317+ template <typename Param>
318+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<1> r) const;
319+
320+ template <typename Param>
321+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<2> r) const;
322+
323+ template <typename Param>
324+ uint32_t ext_oneapi_get_info(sycl::queue q, sycl::range<3> r) const;
325+ ----
326+ _Constraints_: `Param` is `num_sub_groups`.
327+
328+ _Returns_: The number of sub-groups per work-group, when the kernel is
329+ submitted to the specified queue with the specified work-group size, accounting
330+ for any kernel properties or features.
331+ If the kernel can be submitted to the specified queue without an error, the
332+ minimum value returned by this query is 1, otherwise it is 0.
333+
334+ _Throws_: A synchronous `exception` with the error code `errc::invalid` if the
335+ work-group size `r` is 0.
257336
258337== Issues
259338
0 commit comments