1717#[ unsafe( no_mangle) ]
1818fn main ( ) {
1919 let mut x = [ 3.0 ; 256 ] ;
20- kernel ( & mut x) ;
20+ kernel_1 ( & mut x) ;
2121 core:: hint:: black_box ( & x) ;
2222}
2323
@@ -26,11 +26,11 @@ fn main() {
2626// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
2727// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2828
29- // CHECK: @.offload_sizes._kernel = private unnamed_addr constant [1 x i64] [i64 1024]
30- // CHECK: @.offload_maptypes._kernel = private unnamed_addr constant [1 x i64] [i64 35]
31- // CHECK: @._kernel .region_id = weak unnamed_addr constant i8 0
32- // CHECK: @.offloading.entry_name._kernel = internal unnamed_addr constant [8 x i8] c"_kernel \00", section ".llvm.rodata.offloading", align 1
33- // CHECK: @.offloading.entry._kernel = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel .region_id, ptr @.offloading.entry_name._kernel , i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
29+ // CHECK: @.offload_sizes._kernel_1 = private unnamed_addr constant [1 x i64] [i64 1024]
30+ // CHECK: @.offload_maptypes._kernel_1 = private unnamed_addr constant [1 x i64] [i64 35]
31+ // CHECK: @._kernel_1 .region_id = weak unnamed_addr constant i8 0
32+ // CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1 \00", section ".llvm.rodata.offloading", align 1
33+ // CHECK: @.offloading.entry._kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1 .region_id, ptr @.offloading.entry_name._kernel_1 , i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3434
3535// CHECK: @anon.{{.*}}.0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3636// CHECK: @anon.{{.*}}.1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @anon.{{.*}}.0 }, align 8
@@ -40,74 +40,70 @@ fn main() {
4040// CHECK-NEXT: start:
4141// CHECK-NEXT: %0 = alloca [8 x i8], align 8
4242// CHECK-NEXT: %x = alloca [1024 x i8], align 16
43+ // CHECK: call void @kernel_1(ptr noalias noundef nonnull align 4 dereferenceable(1024) %x)
44+ // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %0)
45+ // CHECK-NEXT: store ptr %x, ptr %0, align 8
46+ // CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #4, !srcloc !4
47+ // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %0)
48+ // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 1024, ptr nonnull %x)
49+ // CHECK-NEXT: ret void
50+ // CHECK-NEXT: }
51+
52+ // CHECK: define{{( dso_local)?}} void @kernel_1(ptr noalias noundef align 4 dereferenceable(1024) %x)
53+ // CHECK-NEXT: start:
54+ // CHECK-NEXT: %dummy = load volatile ptr, ptr @.offload_sizes._kernel_1, align 8
55+ // CHECK-NEXT: %dummy1 = load volatile ptr, ptr @.offloading.entry._kernel_1, align 8
4356// CHECK-NEXT: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8
4457// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
4558// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
4659// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
4760// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
48- // CHECK: call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false)
49- // CHECK-NEXT: %1 = getelementptr inbounds float, ptr %x, i32 0
50- // CHECK-NEXT: call void @__tgt_register_lib(ptr %EmptyDesc)
61+ // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %EmptyDesc, i8 0, i64 32, i1 false)
62+ // CHECK-NEXT: call void @__tgt_register_lib(ptr nonnull %EmptyDesc)
5163// CHECK-NEXT: call void @__tgt_init_all_rtls()
52- // CHECK-NEXT: %2 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
53- // CHECK-NEXT: store ptr %x, ptr %2, align 8
54- // CHECK-NEXT: %3 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
55- // CHECK-NEXT: store ptr %1, ptr %3, align 8
56- // CHECK-NEXT: %4 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
57- // CHECK-NEXT: store i64 1024, ptr %4, align 8
58- // CHECK-NEXT: %5 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
59- // CHECK-NEXT: %6 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
60- // CHECK-NEXT: %7 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
61- // CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %5, ptr %6, ptr %7, ptr @.offload_maptypes.1, ptr null, ptr null)
62- // CHECK-NEXT: %8 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0
63- // CHECK-NEXT: store i32 3, ptr %8, align 4
64- // CHECK-NEXT: %9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1
65- // CHECK-NEXT: store i32 1, ptr %9, align 4
66- // CHECK-NEXT: %10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2
67- // CHECK-NEXT: store ptr %5, ptr %10, align 8
68- // CHECK-NEXT: %11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3
69- // CHECK-NEXT: store ptr %6, ptr %11, align 8
70- // CHECK-NEXT: %12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4
71- // CHECK-NEXT: store ptr %7, ptr %12, align 8
72- // CHECK-NEXT: %13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5
73- // CHECK-NEXT: store ptr @.offload_maptypes.1, ptr %13, align 8
74- // CHECK-NEXT: %14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6
75- // CHECK-NEXT: store ptr null, ptr %14, align 8
76- // CHECK-NEXT: %15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7
77- // CHECK-NEXT: store ptr null, ptr %15, align 8
78- // CHECK-NEXT: %16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8
79- // CHECK-NEXT: store i64 0, ptr %16, align 8
80- // CHECK-NEXT: %17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9
81- // CHECK-NEXT: store i64 0, ptr %17, align 8
82- // CHECK-NEXT: %18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10
83- // CHECK-NEXT: store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %18, align 4
84- // CHECK-NEXT: %19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11
85- // CHECK-NEXT: store [3 x i32] [i32 256, i32 0, i32 0], ptr %19, align 4
86- // CHECK-NEXT: %20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12
87- // CHECK-NEXT: store i32 0, ptr %20, align 4
88- // CHECK-NEXT: %21 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)
89- // CHECK-NEXT: %22 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
90- // CHECK-NEXT: %23 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
91- // CHECK-NEXT: %24 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
92- // CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %22, ptr %23, ptr %24, ptr @.offload_maptypes.1, ptr null, ptr null)
93- // CHECK-NEXT: call void @__tgt_unregister_lib(ptr %EmptyDesc)
94- // CHECK: store ptr %x, ptr %0, align 8
95- // CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0)
96- // CHECK: ret void
64+ // CHECK-NEXT: store ptr %x, ptr %.offload_baseptrs, align 8
65+ // CHECK-NEXT: store ptr %x, ptr %.offload_ptrs, align 8
66+ // CHECK-NEXT: store i64 1024, ptr %.offload_sizes, align 8
67+ // CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1, ptr null, ptr null)
68+ // CHECK-NEXT: store i32 3, ptr %kernel_args, align 8
69+ // CHECK-NEXT: %0 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 4
70+ // CHECK-NEXT: store i32 1, ptr %0, align 4
71+ // CHECK-NEXT: %1 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 8
72+ // CHECK-NEXT: store ptr %.offload_baseptrs, ptr %1, align 8
73+ // CHECK-NEXT: %2 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 16
74+ // CHECK-NEXT: store ptr %.offload_ptrs, ptr %2, align 8
75+ // CHECK-NEXT: %3 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 24
76+ // CHECK-NEXT: store ptr %.offload_sizes, ptr %3, align 8
77+ // CHECK-NEXT: %4 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 32
78+ // CHECK-NEXT: store ptr @.offload_maptypes._kernel_1, ptr %4, align 8
79+ // CHECK-NEXT: %5 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 40
80+ // CHECK-NEXT: %6 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 72
81+ // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %5, i8 0, i64 32, i1 false)
82+ // CHECK-NEXT: store <4 x i32> <i32 2097152, i32 0, i32 0, i32 256>, ptr %6, align 8
83+ // CHECK-NEXT: %.fca.1.gep2 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 88
84+ // CHECK-NEXT: store i32 0, ptr %.fca.1.gep2, align 8
85+ // CHECK-NEXT: %.fca.2.gep3 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 92
86+ // CHECK-NEXT: store i32 0, ptr %.fca.2.gep3, align 4
87+ // CHECK-NEXT: %7 = getelementptr inbounds nuw i8, ptr %kernel_args, i64 96
88+ // CHECK-NEXT: store i32 0, ptr %7, align 8
89+ // CHECK-NEXT: %8 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2097152, i32 256, ptr nonnull @._kernel_1.region_id, ptr nonnull %kernel_args)
90+ // CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes._kernel_1, ptr null, ptr null)
91+ // CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
92+ // CHECK-NEXT: ret void
9793// CHECK-NEXT: }
9894
9995// CHECK: Function Attrs: nounwind
10096// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
10197
10298#[ unsafe( no_mangle) ]
10399#[ inline( never) ]
104- pub fn kernel ( x : & mut [ f32 ; 256 ] ) {
105- core:: intrinsics:: offload ( _kernel , ( x, ) )
100+ pub fn kernel_1 ( x : & mut [ f32 ; 256 ] ) {
101+ core:: intrinsics:: offload ( _kernel_1 , ( x, ) )
106102}
107103
108104#[ unsafe( no_mangle) ]
109105#[ inline( never) ]
110- pub fn _kernel ( x : & mut [ f32 ; 256 ] ) {
106+ pub fn _kernel_1 ( x : & mut [ f32 ; 256 ] ) {
111107 for i in 0 ..256 {
112108 x[ i] = 21.0 ;
113109 }
0 commit comments