Skip to content

Commit 32d2d37

Browse files
author
Victor Lomuller
committed
split extension into static and dynamic case, add more tests
1 parent 8110c20 commit 32d2d37

19 files changed

+512
-181
lines changed

clang/lib/Sema/Sema.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1861,7 +1861,9 @@ class DeferredDiagnosticsEmitter
18611861
if (!S.SYCL().checkAllowedSYCLInitializer(VD) &&
18621862
!S.SYCL()
18631863
.isTypeDecoratedWithDeclAttribute<
1864-
SYCLGlobalVariableAllowedAttr>(VD->getType())) {
1864+
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
1865+
!S.SYCL().isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
1866+
VD->getType())) {
18651867
S.Diag(Loc, diag::err_sycl_restrict)
18661868
<< SemaSYCL::KernelConstStaticVariable;
18671869
return;

clang/lib/Sema/SemaDecl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7851,6 +7851,8 @@ NamedDecl *Sema::ActOnVariableDeclarator(
78517851
// attribute.
78527852
if (SCSpec == DeclSpec::SCS_static && !R.isConstant(Context) &&
78537853
!SYCL().isTypeDecoratedWithDeclAttribute<SYCLGlobalVariableAllowedAttr>(
7854+
NewVD->getType()) &&
7855+
!SYCL().isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
78547856
NewVD->getType()))
78557857
SYCL().DiagIfDeviceCode(D.getIdentifierLoc(), diag::err_sycl_restrict)
78567858
<< SemaSYCL::KernelNonConstStaticDataVariable;

clang/lib/Sema/SemaExpr.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,15 +233,19 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
233233
VD->getStorageClass() == SC_Static &&
234234
!VD->hasAttr<SYCLGlobalVarAttr>() &&
235235
!SemaSYCL::isTypeDecoratedWithDeclAttribute<
236-
SYCLGlobalVariableAllowedAttr>(VD->getType()))
236+
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
237+
!SemaSYCL::isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
238+
VD->getType()))
237239
SYCL().DiagIfDeviceCode(*Locs.begin(), diag::err_sycl_restrict)
238240
<< SemaSYCL::KernelNonConstStaticDataVariable;
239241
// Non-const globals are not allowed in SYCL except for ESIMD or with the
240242
// SYCLGlobalVar or SYCLGlobalVariableAllowed attribute.
241243
else if (IsRuntimeEvaluated && !IsEsimdPrivateGlobal && !IsConst &&
242244
VD->hasGlobalStorage() && !VD->hasAttr<SYCLGlobalVarAttr>() &&
243245
!SemaSYCL::isTypeDecoratedWithDeclAttribute<
244-
SYCLGlobalVariableAllowedAttr>(VD->getType()))
246+
SYCLGlobalVariableAllowedAttr>(VD->getType()) &&
247+
!SemaSYCL::isTypeDecoratedWithDeclAttribute<SYCLScopeAttr>(
248+
VD->getType()))
245249
SYCL().DiagIfDeviceCode(*Locs.begin(), diag::err_sycl_restrict)
246250
<< SemaSYCL::KernelGlobalVariable;
247251
// ESIMD globals cannot be used in a SYCL context.

clang/test/SemaSYCL/sycl_wg_scope.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,19 @@ template <typename T> class B15 {
7575
// expected-error@#B15-field {{type with a SYCL work group scope attribute cannot be used with a non-static data members}}
7676
// expected-note@+1 {{in instantiation of template class 'B15<G1>' requested here}}
7777
B15<G1> b15;
78+
79+
G1 g16;
80+
static G1 g17;
81+
82+
struct Wrap {
83+
static G1 g18;
84+
};
85+
86+
__attribute__((sycl_device)) void ref_func() {
87+
G1 g19;
88+
static G1 g20;
89+
90+
(void)g16;
91+
(void)g17;
92+
(void)Wrap::g18;
93+
}
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
= sycl_ext_oneapi_work_group_scratch_memory
2+
3+
:source-highlighter: coderay
4+
:coderay-linenums-mode: table
5+
6+
// This section needs to be after the document title.
7+
:doctype: book
8+
:toc2:
9+
:toc: left
10+
:encoding: utf-8
11+
:lang: en
12+
:dpcpp: pass:[DPC++]
13+
14+
// Set the default source code type in this document to C++,
15+
// for syntax highlighting purposes. This is needed because
16+
// docbook uses c++ and html5 uses cpp.
17+
:language: {basebackend@docbook:c++:cpp}
18+
19+
20+
== Notice
21+
22+
[%hardbreaks]
23+
Copyright (C) 2024 Intel Corporation. All rights reserved.
24+
25+
Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks
26+
of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by
27+
permission by Khronos.
28+
29+
30+
== Contact
31+
32+
To report problems with this extension, please open a new issue at:
33+
34+
https://github.com/intel/llvm/issues
35+
36+
37+
== Dependencies
38+
39+
This extension is written against the SYCL 2020 revision 8 specification. All
40+
references below to the "core SYCL specification" or to section numbers in the
41+
SYCL specification refer to that revision.
42+
43+
The following extensions are required:
44+
45+
- link:../experimental/sycl_ext_oneapi_properties.asciidoc[sycl_ext_oneapi_properties]
46+
47+
- link:../experimental/sycl_ext_oneapi_kernel_properties.asciidoc[sycl_ext_oneapi_kernel_properties]
48+
49+
50+
== Status
51+
52+
This is an experimental extension specification, intended to gather community
53+
feedback. Interfaces defined in this specification may not be implemented yet
54+
or may be in a preliminary state. The specification itself may also change in
55+
incompatible ways before it is finalized. *Shipping software products should
56+
not rely on APIs defined in this specification.*
57+
58+
59+
== Overview
60+
61+
This extension adds a way to allocate device local memory, without explicitly passing a
62+
kernel argument: `get_work_group_scratch_memory`. It provides access to a dynamically sized
63+
buffer without passing it as an argument to the kernel.
64+
Device local memory is memory that is shared by all work-items in a work-group.
65+
The behavior is similar to the usage of unbounded array with the CUDA `+__shared__+` keyword.
66+
67+
68+
== Specification
69+
70+
=== Feature test macro
71+
72+
This extension provides a feature-test macro as described in the core SYCL
73+
specification. An implementation supporting this extension must predefine the
74+
macro `SYCL_EXT_ONEAPI_WORK_GROUP_DYNAMIC` to one of the values defined in the
75+
table below. Applications can test for the existence of this macro to
76+
determine if the implementation supports this feature, or applications can test
77+
the macro's value to determine which of the extension's features the
78+
implementation supports.
79+
80+
[%header,cols="1,5"]
81+
|===
82+
|Value
83+
|Description
84+
85+
|1
86+
|The APIs of this experimental extension are not versioned, so the
87+
feature-test macro always has this value.
88+
|===
89+
90+
91+
=== `get_work_group_scratch_memory` function
92+
93+
The `get_work_group_scratch_memory` function provides access
94+
to a dynamically allocated buffer in the device local memory.
95+
96+
[source,c++]
97+
----
98+
void* get_work_group_scratch_memory()
99+
----
100+
_Constraints_: `T` must be trivially constructible and trivially destructible.
101+
102+
_Returns_: A pointer to a dynamically allocated buffer
103+
in the device local memory.
104+
105+
The size of the allocation is unknown at compile-time,
106+
and must be communicated to the SYCL implementation via the
107+
`work_group_scratch_size` property. Every call to
108+
`get_work_group_scratch_memory` returns the same allocation
109+
in device local memory.
110+
111+
=== Kernel properties
112+
113+
The `work_group_scratch_size` property must be passed to a kernel to determine
114+
the run-time size of the device local memory allocation associated with
115+
all `get_work_group_scratch_memory` calls.
116+
117+
[source,c++]
118+
----
119+
namespace sycl::ext::oneapi::experimental {
120+
121+
struct work_group_scratch_size {
122+
constexpr work_group_scratch_size(size_t bytes) : value(bytes) {}
123+
size_t value;
124+
}; // work_group_scratch_size
125+
126+
using work_group_scratch_size_key = work_group_scratch_size;
127+
128+
template <> struct is_property_key<work_group_scratch_size_key> : std::true_type {};
129+
130+
} // namespace sycl::ext::oneapi::experimental
131+
----
132+
133+
|===
134+
|Property|Description
135+
136+
|`work_group_scratch_size`
137+
|The `work_group_scratch_size` property describes the amount of dynamic
138+
device local memory required by the kernel in bytes.
139+
140+
|===
141+
142+
=== Total allocation check
143+
144+
If the total amount of device local memory requested (i.e., the sum of
145+
all memory requested by `local_accessor`, `group_local_memory`,
146+
`group_local_memory_for_overwrite`, `work_group_static` and `work_group_scratch_size`) exceeds a device's
147+
local memory capacity (as reported by `local_mem_size`) then the implementation
148+
must throw a synchronous `exception` with the `errc::memory_allocation` error
149+
code from the kernel invocation command (e.g. `parallel_for`).
150+
151+
==== Usage examples
152+
153+
===== Allocations with size unknown at compile-time
154+
155+
[source,c++]
156+
----
157+
using namespace syclex = sycl::ext::oneapi::experimental;
158+
159+
160+
...
161+
162+
q.parallel_for(sycl::nd_range<1>{N, M},
163+
syclex::properties{syclex::work_group_scratch_size(M * sizeof(int))},
164+
[=](sycl::nd_item<1> it) {
165+
auto ptr = get_work_group_scratch_memory();
166+
auto ptr2 = get_work_group_scratch_memory();
167+
});
168+
----
169+
170+
171+
== Implementation notes
172+
173+
This non-normative section provides information about one possible
174+
implementation of this extension. It is not part of the specification of the
175+
extension's API.
176+
177+
For `get_work_group_scratch_memory`,
178+
the implementation may need to generate some additional code to
179+
appropriately initialize the pointer(s) returned by the call.
180+
Alternatively, it may be possible to initialize the pointer to the beginning
181+
of the device's local memory region (if that value is known). Either way, the
182+
implementation must account for the existence of one or more `local_accessor`
183+
objects (which themselves may allocate a dynamic amount of device local
184+
memory).
185+
186+
187+
== Issues
188+

0 commit comments

Comments
 (0)