Skip to content

Commit 9409de9

Browse files
author
Victor Lomuller
committed
add test and improve diags
1 parent 416f7ff commit 9409de9

File tree

11 files changed

+176
-54
lines changed

11 files changed

+176
-54
lines changed

clang/include/clang/Basic/AttrDocs.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4096,8 +4096,9 @@ def SYCLWGScopeDocs : Documentation {
40964096
let Heading = "__sycl_detail__::wg_scope";
40974097
let Content = [{
40984098
This attribute can only be applied to records with a trivial default constructor and destructor.
4099+
Types with this attribute cannot be used for non-static data members.
40994100
It indicates that any block and namespace scope variable of a type holding this attribute
4100-
will be allocated in the local memory. For variables allocated in block scope, they behave
4101+
will be allocated in local memory. For variables allocated in block scope, they behave
41014102
as implicitly declared as static.
41024103
}];
41034104
}

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12466,6 +12466,8 @@ def warn_sycl_kernel_too_big_args : Warning<
1246612466
def err_sycl_wg_scope : Error<
1246712467
"SYCL work group scope only applies to class with a trivial "
1246812468
"%select{default constructor|destructor}0">;
12469+
def err_sycl_field_with_wg_scope : Error<
12470+
"type with a SYCL work group scope attribute cannot be used with a non-static data members">;
1246912471
def err_sycl_virtual_types : Error<
1247012472
"no class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
1247112473
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;

clang/include/clang/Sema/SemaSYCL.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ class SemaSYCL : public SemaBase {
266266

267267
void CheckSYCLKernelCall(FunctionDecl *CallerFunc,
268268
ArrayRef<const Expr *> Args);
269-
void CheckSYCLScope(CXXRecordDecl *Decl);
269+
void CheckSYCLScopeAttr(CXXRecordDecl *Decl);
270270

271271
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
272272
/// context is "used as device code".

clang/lib/Sema/SemaDecl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18520,6 +18520,14 @@ FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T,
1852018520
InvalidDecl = true;
1852118521
}
1852218522

18523+
if (LangOpts.SYCLIsDevice) {
18524+
const CXXRecordDecl *RD = T->getAsCXXRecordDecl();
18525+
if (RD && RD->hasAttr<SYCLScopeAttr>()) {
18526+
Diag(Loc, diag::err_sycl_field_with_wg_scope);
18527+
InvalidDecl = true;
18528+
}
18529+
}
18530+
1852318531
if (LangOpts.OpenCL) {
1852418532
// OpenCL v1.2 s6.9b,r & OpenCL v2.0 s6.12.5 - The following types cannot be
1852518533
// used as structure or union field: image, sampler, event or block types.

clang/lib/Sema/SemaDeclCXX.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7218,7 +7218,7 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
72187218
checkCUDADeviceBuiltinTextureClassTemplate(*this, Record);
72197219
}
72207220
if (getLangOpts().SYCLIsDevice && Record->hasAttr<SYCLScopeAttr>()) {
7221-
SYCL().CheckSYCLScope(Record);
7221+
SYCL().CheckSYCLScopeAttr(Record);
72227222
}
72237223
}
72247224

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5070,11 +5070,14 @@ void SemaSYCL::CheckSYCLKernelCall(FunctionDecl *KernelFunc,
50705070
KernelFunc->setInvalidDecl();
50715071
}
50725072

5073-
void SemaSYCL::CheckSYCLScope(CXXRecordDecl *Decl) {
5073+
void SemaSYCL::CheckSYCLScopeAttr(CXXRecordDecl *Decl) {
50745074
assert(Decl->hasAttr<SYCLScopeAttr>());
50755075

50765076
bool HasError = false;
50775077

5078+
if (Decl->isDependentContext())
5079+
return;
5080+
50785081
// We don't emit both diags at the time as note will only be emitted for the
50795082
// first, which is confusing. So we check both cases but only report one.
50805083
if (!Decl->hasTrivialDefaultConstructor()) {
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// RUN: %clang_cc1 -fsycl-is-device -verify %s
2+
3+
class [[__sycl_detail__::wg_scope]] G1 {};
4+
class [[__sycl_detail__::wg_scope]] G2 {
5+
G2() = default;
6+
G2(int i) : i(i) {}
7+
int i;
8+
};
9+
10+
class [[__sycl_detail__::wg_scope]] G3 {
11+
~G3() = default;
12+
};
13+
14+
class [[__sycl_detail__::wg_scope]] B4 { // expected-error {{SYCL work group scope only applies to class with a trivial default constructor}}
15+
B4() {}
16+
};
17+
18+
class [[__sycl_detail__::wg_scope]] B5 { // expected-error {{SYCL work group scope only applies to class with a trivial destructor}}
19+
~B5() {}
20+
};
21+
22+
class [[__sycl_detail__::wg_scope]] B6 { // expected-error {{SYCL work group scope only applies to class with a trivial default constructor}}
23+
B6() {}
24+
~B6() {}
25+
};
26+
27+
template <typename T> class [[__sycl_detail__::wg_scope]] B7 { // #B7
28+
public:
29+
T obj;
30+
};
31+
32+
struct Valid {};
33+
struct InvalidCtor {
34+
InvalidCtor() {}
35+
};
36+
struct InvalidDtor {
37+
~InvalidDtor() {}
38+
};
39+
struct InvalidCDtor {
40+
InvalidCDtor() {}
41+
~InvalidCDtor() {}
42+
};
43+
44+
B7<Valid> b7;
45+
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial default constructor}}
46+
// expected-note@+1 {{in instantiation of template class 'B7<InvalidCtor>' requested here}}
47+
B7<InvalidCtor> b9;
48+
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial destructor}}
49+
// expected-note@+1 {{in instantiation of template class 'B7<InvalidDtor>' requested here}}
50+
B7<InvalidDtor> b10;
51+
// expected-error@#B7 {{SYCL work group scope only applies to class with a trivial default constructor}}
52+
// expected-note@+1 {{in instantiation of template class 'B7<InvalidCDtor>' requested here}}
53+
B7<InvalidCDtor> b11;
54+
55+
template <typename T> class [[__sycl_detail__::wg_scope]] B12 { // #B12
56+
public:
57+
B12() = default;
58+
~B12() = default;
59+
T obj;
60+
};
61+
62+
B12<Valid> b12;
63+
// expected-error@#B12 {{SYCL work group scope only applies to class with a trivial default constructor}}
64+
// expected-note@+1 {{in instantiation of template class 'B12<InvalidCtor>' requested here}}
65+
B12<InvalidCtor> b13;
66+
67+
class B14 {
68+
G1 field; // expected-error {{type with a SYCL work group scope attribute cannot be used with a non-static data members}}
69+
};
70+
71+
template <typename T> class B15 {
72+
T field; // #B15-field
73+
};
74+
75+
// expected-error@#B15-field {{type with a SYCL work group scope attribute cannot be used with a non-static data members}}
76+
// expected-note@+1 {{in instantiation of template class 'B15<G1>' requested here}}
77+
B15<G1> b15;

llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,20 +129,17 @@ lowerDynamicLocalMemCallDirect(CallInst *CI, Triple TT,
129129

130130
Value *GVPtr = [&]() -> Value * {
131131
IRBuilder<> Builder(CI);
132-
if (TT.isSPIROrSPIRV()) {
133-
132+
if (TT.isSPIROrSPIRV())
134133
return Builder.CreateLoad(CI->getType(), LocalMemPlaceholder);
135-
} else {
136-
Value *ArgAlign = CI->getArgOperand(0);
137-
Align RequestedAlignment{
138-
cast<llvm::ConstantInt>(ArgAlign)->getZExtValue()};
139-
MaybeAlign CurrentAlignment = LocalMemPlaceholder->getAlign();
140-
if (!CurrentAlignment.has_value() ||
141-
(CurrentAlignment.value() < RequestedAlignment))
142-
LocalMemPlaceholder->setAlignment(RequestedAlignment);
143-
144-
return Builder.CreatePointerCast(LocalMemPlaceholder, CI->getType());
145-
}
134+
Value *ArgAlign = CI->getArgOperand(0);
135+
Align RequestedAlignment{
136+
cast<llvm::ConstantInt>(ArgAlign)->getZExtValue()};
137+
MaybeAlign CurrentAlignment = LocalMemPlaceholder->getAlign();
138+
if (!CurrentAlignment.has_value() ||
139+
(CurrentAlignment.value() < RequestedAlignment))
140+
LocalMemPlaceholder->setAlignment(RequestedAlignment);
141+
142+
return Builder.CreatePointerCast(LocalMemPlaceholder, CI->getType());
146143
}();
147144
CI->replaceAllUsesWith(GVPtr);
148145
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
; RUN: opt -S -sycllowerwglocalmemory -bugpoint-enable-legacy-pm < %s | FileCheck %s
2+
; RUN: opt -S -passes=sycllowerwglocalmemory < %s | FileCheck %s
3+
4+
; CHECK-NOT: __sycl_dynamicLocalMemoryPlaceholder
5+
6+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
7+
target triple = "spir64-unknown-unknown"
8+
9+
; CHECK-DAG: @__sycl_dynamicLocalMemoryPlaceholder_GV = linkonce_odr addrspace(3) global ptr addrspace(3) undef
10+
11+
; Function Attrs: convergent norecurse
12+
; CHECK-DAG: @_ZTS7KernelA(ptr addrspace(1) %0, ptr addrspace(3) noalias "sycl-implicit-local-arg" %[[IMPLICT_ARG:[a-zA-Z0-9]+]]{{.*}} !kernel_arg_addr_space ![[ADDR_SPACE_MD:[0-9]+]]
13+
define weak_odr dso_local spir_kernel void @_ZTS7KernelA(ptr addrspace(1) %0) local_unnamed_addr #0 !kernel_arg_addr_space !5 {
14+
entry:
15+
; CHECK-DAG: store ptr addrspace(3) %[[IMPLICT_ARG]], ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder_GV
16+
; CHECK-DAG: %[[LD1:[a-zA-Z0-9]+]] = load ptr addrspace(3), ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder_GV
17+
%1 = tail call spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64 128) #1
18+
; CHECK-DAG: getelementptr inbounds i8, ptr addrspace(3) %[[LD1]]
19+
%2 = getelementptr inbounds i8, ptr addrspace(3) %1, i64 4
20+
; CHECK-DAG: %[[LD2:[a-zA-Z0-9]+]] = load ptr addrspace(3), ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder_GV
21+
%3 = tail call spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64 4) #1
22+
ret void
23+
}
24+
25+
; Function Attrs: convergent
26+
declare dso_local spir_func ptr addrspace(3) @__sycl_dynamicLocalMemoryPlaceholder(i64) local_unnamed_addr #1
27+
28+
attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" "sycl-work-group-static"="1" }
29+
attributes #1 = { convergent norecurse }
30+
31+
!llvm.module.flags = !{!0}
32+
!opencl.spir.version = !{!1}
33+
!spirv.Source = !{!2}
34+
!llvm.ident = !{!3}
35+
36+
!0 = !{i32 1, !"wchar_size", i32 4}
37+
!1 = !{i32 1, i32 2}
38+
!2 = !{i32 4, i32 100000}
39+
!3 = !{!"clang version 13.0.0"}
40+
!4 = !{}
41+
; ![[ADDR_SPACE_MD]] = !{i32 1, i32 3}
42+
!5 = !{i32 1}

sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_static.asciidoc

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ Device local memory is memory that is shared by all work-items in a work-group.
6464
The behavior is similar to the CUDA `+__shared__+` keyword, and the extension
6565
draws some inspiration from the {cpp} `thread_local` keyword.
6666

67-
`work_group_static` variables can be allocated at namespace or block scope,
67+
`work_group_static` can only be used to declare variables at namespace, block or class scope,
6868
lifting many of the restrictions in the existing
6969
link:../supported/sycl_ext_oneapi_local_memory.asciidoc[sycl_ext_oneapi_local_memory]
7070
extension. Note, however, that `work_group_static` variables currently place
@@ -106,7 +106,7 @@ an object into device local memory.
106106
namespace sycl::ext::oneapi::experimental {
107107
108108
template <typename T>
109-
class work_group_static {
109+
class work_group_static final {
110110
public:
111111
112112
work_group_static() = default;
@@ -121,8 +121,7 @@ public:
121121
T* operator&() const noexcept;
122122
123123
private:
124-
T* ptr; // exposition only
125-
124+
T storage;
126125
};
127126
128127
} // namespace sycl::ext::oneapi::experimental
@@ -134,6 +133,9 @@ The storage for the object is allocated in device local memory before
134133
calling the user's kernel lambda, and deallocated when all work-items
135134
in the work-group have completed execution of the kernel.
136135

136+
Objects of type `work_group_static` must only be declared at namespace, block, lambda or class scope.
137+
If the object is declared in class scope, it must be declared as a static data member.
138+
137139
SYCL implementations conforming to the full feature set treat
138140
`work_group_static` similarly to the `thread_local` keyword, and when
139141
a `work_group_static` object is declared at block scope it behaves
@@ -150,18 +152,11 @@ multiple times, developers must take care to avoid race conditions (e.g., by
150152
calling `group_barrier` before and after using the memory).
151153
====
152154

153-
Change to SYCL 2020 section `5.9.2 Common address space deduction rules`:
154-
Namespace scope: if the variable is `work_group_static` object,
155-
then the variable is assigned to the local address space.
156-
Otherwise normal rules applies.
157-
158155
SYCL 2020 requires that all global variables accessed by a device function are
159156
`const` or `constexpr`. This extension lifts that restriction for
160157
`work_group_static` variables.
161158

162-
When `T` is a class type or bounded array, the size of the allocation is known
163-
at compile-time, and a SYCL implementation embeds the size of the allocation
164-
directly within a kernel. Each instance of `work_group_static<T>` is associated
159+
Each instance of `work_group_static<T>` is associated
165160
with a unique allocation in device local memory.
166161

167162
[source,c++]
@@ -173,7 +168,7 @@ associated with this instance of `work_group_static`.
173168

174169
[source,c++]
175170
----
176-
work_group_static<T>& operator=(const T& value) noexcept;
171+
work_group_static& operator=(const T& value) noexcept;
177172
----
178173
_Constraints_: Available only if `std::is_array_v<T>` is false.
179174

@@ -188,6 +183,11 @@ T* operator&() noexcept;
188183
_Returns_: A pointer to the device local memory associated with this
189184
instance of `work_group_static` (i.e., `ptr`).
190185

186+
==== Interaction with common address space deduction rules
187+
188+
Objects of type `work_group_static` are assigned to
189+
the local address space.
190+
191191
=== `get_dynamic_work_group_memory` function
192192

193193
The `get_dynamic_work_group_memory` function provides access
@@ -213,15 +213,7 @@ in device local memory, regardless of `T`. For example, two call declared
213213
as `get_dynamic_work_group_memory<int>` and
214214
`get_dynamic_work_group_memory<float>` will be associated with the same shared allocation.
215215

216-
If the total amount of device local memory requested (i.e., the sum of
217-
all memory requested by `local_accessor`, `group_local_memory`,
218-
`group_local_memory_for_overwrite` and `work_group_static`) exceeds a device's
219-
local memory capacity (as reported by `local_mem_size`) then the implementation
220-
must throw a synchronous `exception` with the `errc::memory_allocation` error
221-
code from the kernel invocation command (e.g. `parallel_for`).
222-
223-
224-
==== Kernel properties
216+
=== Kernel properties
225217

226218
The `work_group_static_size` property must be passed to a kernel to determine
227219
the run-time size of the device local memory allocation associated with
@@ -252,6 +244,14 @@ device local memory required by the kernel in bytes.
252244

253245
|===
254246

247+
=== Total allocation check
248+
249+
If the total amount of device local memory requested (i.e., the sum of
250+
all memory requested by `local_accessor`, `group_local_memory`,
251+
`group_local_memory_for_overwrite`, `work_group_static` and `work_group_static_size`) exceeds a device's
252+
local memory capacity (as reported by `local_mem_size`) then the implementation
253+
must throw a synchronous `exception` with the `errc::memory_allocation` error
254+
code from the kernel invocation command (e.g. `parallel_for`).
255255

256256
==== Usage examples
257257

0 commit comments

Comments
 (0)