Skip to content

Commit 4abaf06

Browse files
Fznamznontahonermann
authored andcommitted
[SYCL-Upstreaming] Fix a crash an argument of skep function is not trivially-copyable (#53)
device-copyable doesn't mean trivially-copyable, so we may encounter arguments that need cleanup. Adds test that verifies presence of the dtor call in the synthesized code.
1 parent 59baed4 commit 4abaf06

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ StmtResult BuildSYCLKernelLaunchStmt(Sema &SemaRef,
511511
if (LaunchResult.isInvalid())
512512
return StmtError();
513513

514-
Stmts.push_back(LaunchResult.get());
514+
Stmts.push_back(SemaRef.MaybeCreateExprWithCleanups(LaunchResult).get());
515515
}
516516

517517
return CompoundStmt::Create(SemaRef.getASTContext(), Stmts,

clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,23 @@ void skep(KT k, int a, int b) {
6363
}
6464
};
6565

66+
struct auto_name;
67+
68+
template <typename KernelName, typename KernelType>
69+
[[clang::sycl_kernel_entry_point(KernelName)]]
70+
void __kernel_single_task(const KernelType KernelFunc) {
71+
KernelFunc();
72+
}
73+
74+
template <typename KernelType, typename KernelName = auto_name>
75+
void pf(KernelType K) {
76+
__kernel_single_task<KernelName>(K);
77+
}
78+
struct DCopyable {
79+
int i;
80+
~DCopyable();
81+
};
82+
6683
int main() {
6784
single_purpose_kernel obj;
6885
single_purpose_kernel_task(obj);
@@ -72,6 +89,9 @@ int main() {
7289
kernel_single_task<\u03b4\u03c4\u03c7>([](int){});
7390
Handler H;
7491
H.skep<class notaverygoodkernelname>([=](int a, int b){return a+b;}, 1, 2);
92+
93+
DCopyable b;
94+
pf([b](){});
7595
}
7696

7797
// Verify that SYCL kernel caller functions are not emitted during host
@@ -140,6 +160,17 @@ int main() {
140160
// CHECK-HOST-LINUX-NEXT: ret void
141161
// CHECK-HOST-LINUX-NEXT: }
142162

163+
// CHECK-HOST-LINUX: define internal void @_Z20__kernel_single_taskI9auto_nameZ4mainEUlvE_EvT0_(ptr noundef %KernelFunc)
164+
// CHECK-HOST-LINUX-NEXT: entry:
165+
// CHECK-HOST-LINUX-NEXT: %KernelFunc.indirect_addr = alloca ptr, align 8
166+
// CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.3, align 4
167+
// CHECK-HOST-LINUX-NEXT: store ptr %KernelFunc, ptr %KernelFunc.indirect_addr, align 8
168+
// CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %KernelFunc, i64 4, i1 false)
169+
// CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchI9auto_nameZ4mainEUlvE_EvPKcT0_(ptr noundef @.str.4, ptr noundef %agg.tmp)
170+
// CHECK-HOST-LINUX-NEXT: call void @_ZZ4mainENUlvE_D1Ev(ptr noundef nonnull align 4 dereferenceable(4) %agg.tmp) #4
171+
// CHECK-HOST-LINUX-NEXT: ret void
172+
// CHECK-HOST-LINUX-NEXT: }
173+
143174
// CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} {
144175
// CHECK-HOST-WINDOWS-NEXT: entry:
145176
// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1
@@ -199,6 +230,22 @@ int main() {
199230
// CHECK-HOST-WINDOWS-NEXT: ret void
200231
// CHECK-HOST-WINDOWS-NEXT: }
201232

233+
// CHECK-HOST-WINDOWS: define internal void @"??$__kernel_single_task@Uauto_name@@V<lambda_4>@?0??main@@9@@@YAXV<lambda_4>@?0??main@@9@@Z"(i32 %KernelFunc.coerce)
234+
// CHECK-HOST-WINDOWS-NEXT: entry:
235+
// CHECK-HOST-WINDOWS-NEXT: %KernelFunc = alloca %class.anon.3, align 4
236+
// CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.3, align 4
237+
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.3, ptr %KernelFunc, i32 0, i32 0
238+
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.DCopyable, ptr %coerce.dive, i32 0, i32 0
239+
// CHECK-HOST-WINDOWS-NEXT: store i32 %KernelFunc.coerce, ptr %coerce.dive1, align 4
240+
// CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %KernelFunc, i64 4, i1 false)
241+
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive2 = getelementptr inbounds nuw %class.anon.3, ptr %agg.tmp, i32 0, i32 0
242+
// CHECK-HOST-WINDOWS-NEXT: %coerce.dive3 = getelementptr inbounds nuw %struct.DCopyable, ptr %coerce.dive2, i32 0, i32 0
243+
// CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %coerce.dive3, align 4
244+
// CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@Uauto_name@@V<lambda_4>@?0??main@@9@@@YAXPEBDV<lambda_4>@?0??main@@9@@Z"(ptr noundef @"??_C@_0P@HMAAEHI@_ZTS9auto_name?$AA@", i32 %0)
245+
// CHECK-HOST-WINDOWS-NEXT: call void @"??1<lambda_4>@?0??main@@9@QEAA@XZ"(ptr noundef nonnull align 4 dereferenceable(4) %KernelFunc)
246+
// CHECK-HOST-WINDOWS-NEXT: ret void
247+
// CHECK-HOST-WINDOWS-NEXT: }
248+
202249
// Verify that SYCL kernel caller functions are emitted for each device target.
203250
//
204251
// main() shouldn't be emitted in device code.

0 commit comments

Comments
 (0)