@@ -63,6 +63,23 @@ void skep(KT k, int a, int b) {
6363}
6464};
6565
66+ struct auto_name ;
67+
68+ template <typename KernelName, typename KernelType>
69+ [[clang::sycl_kernel_entry_point(KernelName)]]
70+ void __kernel_single_task (const KernelType KernelFunc) {
71+ KernelFunc ();
72+ }
73+
74+ template <typename KernelType, typename KernelName = auto_name>
75+ void pf (KernelType K) {
76+ __kernel_single_task<KernelName>(K);
77+ }
78+ struct DCopyable {
79+ int i;
80+ ~DCopyable ();
81+ };
82+
6683int main () {
6784 single_purpose_kernel obj;
6885 single_purpose_kernel_task (obj);
@@ -72,6 +89,9 @@ int main() {
7289 kernel_single_task<\u03b4\u03c4\u03c7>([](int ){});
7390 Handler H;
7491 H.skep <class notaverygoodkernelname >([=](int a, int b){return a+b;}, 1 , 2 );
92+
93+ DCopyable b;
94+ pf ([b](){});
7595}
7696
7797// Verify that SYCL kernel caller functions are not emitted during host
@@ -140,6 +160,17 @@ int main() {
140160// CHECK-HOST-LINUX-NEXT: ret void
141161// CHECK-HOST-LINUX-NEXT: }
142162
163+ // CHECK-HOST-LINUX: define internal void @_Z20__kernel_single_taskI9auto_nameZ4mainEUlvE_EvT0_(ptr noundef %KernelFunc)
164+ // CHECK-HOST-LINUX-NEXT: entry:
165+ // CHECK-HOST-LINUX-NEXT: %KernelFunc.indirect_addr = alloca ptr, align 8
166+ // CHECK-HOST-LINUX-NEXT: %agg.tmp = alloca %class.anon.3, align 4
167+ // CHECK-HOST-LINUX-NEXT: store ptr %KernelFunc, ptr %KernelFunc.indirect_addr, align 8
168+ // CHECK-HOST-LINUX-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %KernelFunc, i64 4, i1 false)
169+ // CHECK-HOST-LINUX-NEXT: call void @_Z18sycl_kernel_launchI9auto_nameZ4mainEUlvE_EvPKcT0_(ptr noundef @.str.4, ptr noundef %agg.tmp)
170+ // CHECK-HOST-LINUX-NEXT: call void @_ZZ4mainENUlvE_D1Ev(ptr noundef nonnull align 4 dereferenceable(4) %agg.tmp) #4
171+ // CHECK-HOST-LINUX-NEXT: ret void
172+ // CHECK-HOST-LINUX-NEXT: }
173+
143174// CHECK-HOST-WINDOWS: define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} {
144175// CHECK-HOST-WINDOWS-NEXT: entry:
145176// CHECK-HOST-WINDOWS-NEXT: %kernelFunc = alloca %struct.single_purpose_kernel, align 1
@@ -199,6 +230,22 @@ int main() {
199230// CHECK-HOST-WINDOWS-NEXT: ret void
200231// CHECK-HOST-WINDOWS-NEXT: }
201232
233+ // CHECK-HOST-WINDOWS: define internal void @"??$__kernel_single_task@Uauto_name@@V<lambda_4>@?0??main@@9@@@YAXV<lambda_4>@?0??main@@9@@Z"(i32 %KernelFunc.coerce)
234+ // CHECK-HOST-WINDOWS-NEXT: entry:
235+ // CHECK-HOST-WINDOWS-NEXT: %KernelFunc = alloca %class.anon.3, align 4
236+ // CHECK-HOST-WINDOWS-NEXT: %agg.tmp = alloca %class.anon.3, align 4
237+ // CHECK-HOST-WINDOWS-NEXT: %coerce.dive = getelementptr inbounds nuw %class.anon.3, ptr %KernelFunc, i32 0, i32 0
238+ // CHECK-HOST-WINDOWS-NEXT: %coerce.dive1 = getelementptr inbounds nuw %struct.DCopyable, ptr %coerce.dive, i32 0, i32 0
239+ // CHECK-HOST-WINDOWS-NEXT: store i32 %KernelFunc.coerce, ptr %coerce.dive1, align 4
240+ // CHECK-HOST-WINDOWS-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %KernelFunc, i64 4, i1 false)
241+ // CHECK-HOST-WINDOWS-NEXT: %coerce.dive2 = getelementptr inbounds nuw %class.anon.3, ptr %agg.tmp, i32 0, i32 0
242+ // CHECK-HOST-WINDOWS-NEXT: %coerce.dive3 = getelementptr inbounds nuw %struct.DCopyable, ptr %coerce.dive2, i32 0, i32 0
243+ // CHECK-HOST-WINDOWS-NEXT: %0 = load i32, ptr %coerce.dive3, align 4
244+ // CHECK-HOST-WINDOWS-NEXT: call void @"??$sycl_kernel_launch@Uauto_name@@V<lambda_4>@?0??main@@9@@@YAXPEBDV<lambda_4>@?0??main@@9@@Z"(ptr noundef @"??_C@_0P@HMAAEHI@_ZTS9auto_name?$AA@", i32 %0)
245+ // CHECK-HOST-WINDOWS-NEXT: call void @"??1<lambda_4>@?0??main@@9@QEAA@XZ"(ptr noundef nonnull align 4 dereferenceable(4) %KernelFunc)
246+ // CHECK-HOST-WINDOWS-NEXT: ret void
247+ // CHECK-HOST-WINDOWS-NEXT: }
248+
202249// Verify that SYCL kernel caller functions are emitted for each device target.
203250//
204251// main() shouldn't be emitted in device code.
0 commit comments