@@ -111,63 +111,16 @@ entry:
111111; store i32 %i, ptr %out, align 4
112112; ret void
113113; }
114-
115- ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
116- define dso_local ptx_kernel void @read_only_gep_asc0 (ptr nocapture noundef writeonly %out , ptr nocapture noundef readonly byval (%struct.S ) align 4 %s ) local_unnamed_addr #0 {
117- ; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
118- ; SM_60-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
119- ; SM_60-NEXT: [[ENTRY:.*:]]
120- ; SM_60-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4
121- ; SM_60-NEXT: [[S2:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
122- ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false)
123- ; SM_60-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4
124- ; SM_60-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
125- ; SM_60-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
126- ; SM_60-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
127- ; SM_60-NEXT: store i32 [[I]], ptr [[OUT]], align 4
128- ; SM_60-NEXT: ret void
129- ;
130- ; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
131- ; SM_70-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
132- ; SM_70-NEXT: [[ENTRY:.*:]]
133- ; SM_70-NEXT: [[S_PARAM:%.*]] = call ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]])
134- ; SM_70-NEXT: [[S_GEN:%.*]] = addrspacecast ptr addrspace(101) [[S_PARAM]] to ptr
135- ; SM_70-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S_GEN]], i64 4
136- ; SM_70-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
137- ; SM_70-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
138- ; SM_70-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
139- ; SM_70-NEXT: store i32 [[I]], ptr [[OUT]], align 4
140- ; SM_70-NEXT: ret void
141- ;
142- ; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0(
143- ; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] {
144- ; COPY-NEXT: [[ENTRY:.*:]]
145- ; COPY-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4
146- ; COPY-NEXT: [[ASC:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
147- ; COPY-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(101) [[ASC]] to ptr
148- ; COPY-NEXT: [[I:%.*]] = load i32, ptr [[ASC0]], align 4
149- ; COPY-NEXT: store i32 [[I]], ptr [[OUT]], align 4
150- ; COPY-NEXT: ret void
151114;
152- ; PTX-LABEL: read_only_gep_asc0(
153- ; PTX: {
154- ; PTX-NEXT: .reg .b32 %r<2>;
155- ; PTX-NEXT: .reg .b64 %rd<3>;
156- ; PTX-EMPTY:
157- ; PTX-NEXT: // %bb.0: // %entry
158- ; PTX-NEXT: ld.param.u64 %rd1, [read_only_gep_asc0_param_0];
159- ; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
160- ; PTX-NEXT: ld.param.u32 %r1, [read_only_gep_asc0_param_1+4];
161- ; PTX-NEXT: st.global.u32 [%rd2], %r1;
162- ; PTX-NEXT: ret;
163- entry:
164- %b = getelementptr inbounds nuw i8 , ptr %s , i64 4
165- %asc = addrspacecast ptr %b to ptr addrspace (101 )
166- %asc0 = addrspacecast ptr addrspace (101 ) %asc to ptr
167- %i = load i32 , ptr %asc0 , align 4
168- store i32 %i , ptr %out , align 4
169- ret void
170- }
115+ ; define dso_local ptx_kernel void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 {
116+ ; entry:
117+ ; %b = getelementptr inbounds nuw i8, ptr %s, i64 4
118+ ; %asc = addrspacecast ptr %b to ptr addrspace(101)
119+ ; %asc0 = addrspacecast ptr addrspace(101) %asc to ptr
120+ ; %i = load i32, ptr %asc0, align 4
121+ ; store i32 %i, ptr %out, align 4
122+ ; ret void
123+ ; }
171124
172125; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
173126define dso_local ptx_kernel void @escape_ptr (ptr nocapture noundef readnone %out , ptr noundef byval (%struct.S ) align 4 %s ) local_unnamed_addr #0 {
@@ -182,14 +135,14 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
182135;
183136; PTX-LABEL: escape_ptr(
184137; PTX: {
185- ; PTX-NEXT: .local .align 4 .b8 __local_depot4 [8];
138+ ; PTX-NEXT: .local .align 4 .b8 __local_depot2 [8];
186139; PTX-NEXT: .reg .b64 %SP;
187140; PTX-NEXT: .reg .b64 %SPL;
188141; PTX-NEXT: .reg .b32 %r<3>;
189142; PTX-NEXT: .reg .b64 %rd<3>;
190143; PTX-EMPTY:
191144; PTX-NEXT: // %bb.0: // %entry
192- ; PTX-NEXT: mov.b64 %SPL, __local_depot4 ;
145+ ; PTX-NEXT: mov.b64 %SPL, __local_depot2 ;
193146; PTX-NEXT: cvta.local.u64 %SP, %SPL;
194147; PTX-NEXT: add.u64 %rd1, %SP, 0;
195148; PTX-NEXT: add.u64 %rd2, %SPL, 0;
@@ -226,14 +179,14 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
226179;
227180; PTX-LABEL: escape_ptr_gep(
228181; PTX: {
229- ; PTX-NEXT: .local .align 4 .b8 __local_depot5 [8];
182+ ; PTX-NEXT: .local .align 4 .b8 __local_depot3 [8];
230183; PTX-NEXT: .reg .b64 %SP;
231184; PTX-NEXT: .reg .b64 %SPL;
232185; PTX-NEXT: .reg .b32 %r<3>;
233186; PTX-NEXT: .reg .b64 %rd<4>;
234187; PTX-EMPTY:
235188; PTX-NEXT: // %bb.0: // %entry
236- ; PTX-NEXT: mov.b64 %SPL, __local_depot5 ;
189+ ; PTX-NEXT: mov.b64 %SPL, __local_depot3 ;
237190; PTX-NEXT: cvta.local.u64 %SP, %SPL;
238191; PTX-NEXT: add.u64 %rd1, %SP, 0;
239192; PTX-NEXT: add.u64 %rd2, %SPL, 0;
@@ -271,14 +224,14 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
271224;
272225; PTX-LABEL: escape_ptr_store(
273226; PTX: {
274- ; PTX-NEXT: .local .align 4 .b8 __local_depot6 [8];
227+ ; PTX-NEXT: .local .align 4 .b8 __local_depot4 [8];
275228; PTX-NEXT: .reg .b64 %SP;
276229; PTX-NEXT: .reg .b64 %SPL;
277230; PTX-NEXT: .reg .b32 %r<3>;
278231; PTX-NEXT: .reg .b64 %rd<5>;
279232; PTX-EMPTY:
280233; PTX-NEXT: // %bb.0: // %entry
281- ; PTX-NEXT: mov.b64 %SPL, __local_depot6 ;
234+ ; PTX-NEXT: mov.b64 %SPL, __local_depot4 ;
282235; PTX-NEXT: cvta.local.u64 %SP, %SPL;
283236; PTX-NEXT: ld.param.u64 %rd1, [escape_ptr_store_param_0];
284237; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -309,14 +262,14 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
309262;
310263; PTX-LABEL: escape_ptr_gep_store(
311264; PTX: {
312- ; PTX-NEXT: .local .align 4 .b8 __local_depot7 [8];
265+ ; PTX-NEXT: .local .align 4 .b8 __local_depot5 [8];
313266; PTX-NEXT: .reg .b64 %SP;
314267; PTX-NEXT: .reg .b64 %SPL;
315268; PTX-NEXT: .reg .b32 %r<3>;
316269; PTX-NEXT: .reg .b64 %rd<6>;
317270; PTX-EMPTY:
318271; PTX-NEXT: // %bb.0: // %entry
319- ; PTX-NEXT: mov.b64 %SPL, __local_depot7 ;
272+ ; PTX-NEXT: mov.b64 %SPL, __local_depot5 ;
320273; PTX-NEXT: cvta.local.u64 %SP, %SPL;
321274; PTX-NEXT: ld.param.u64 %rd1, [escape_ptr_gep_store_param_0];
322275; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -349,14 +302,14 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
349302;
350303; PTX-LABEL: escape_ptrtoint(
351304; PTX: {
352- ; PTX-NEXT: .local .align 4 .b8 __local_depot8 [8];
305+ ; PTX-NEXT: .local .align 4 .b8 __local_depot6 [8];
353306; PTX-NEXT: .reg .b64 %SP;
354307; PTX-NEXT: .reg .b64 %SPL;
355308; PTX-NEXT: .reg .b32 %r<3>;
356309; PTX-NEXT: .reg .b64 %rd<5>;
357310; PTX-EMPTY:
358311; PTX-NEXT: // %bb.0: // %entry
359- ; PTX-NEXT: mov.b64 %SPL, __local_depot8 ;
312+ ; PTX-NEXT: mov.b64 %SPL, __local_depot6 ;
360313; PTX-NEXT: cvta.local.u64 %SP, %SPL;
361314; PTX-NEXT: ld.param.u64 %rd1, [escape_ptrtoint_param_0];
362315; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
@@ -507,14 +460,14 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
507460;
508461; PTX-LABEL: memcpy_to_param(
509462; PTX: {
510- ; PTX-NEXT: .local .align 8 .b8 __local_depot11 [8];
463+ ; PTX-NEXT: .local .align 8 .b8 __local_depot9 [8];
511464; PTX-NEXT: .reg .b64 %SP;
512465; PTX-NEXT: .reg .b64 %SPL;
513466; PTX-NEXT: .reg .b32 %r<3>;
514467; PTX-NEXT: .reg .b64 %rd<48>;
515468; PTX-EMPTY:
516469; PTX-NEXT: // %bb.0: // %entry
517- ; PTX-NEXT: mov.b64 %SPL, __local_depot11 ;
470+ ; PTX-NEXT: mov.b64 %SPL, __local_depot9 ;
518471; PTX-NEXT: cvta.local.u64 %SP, %SPL;
519472; PTX-NEXT: ld.param.u64 %rd1, [memcpy_to_param_param_0];
520473; PTX-NEXT: add.u64 %rd3, %SPL, 0;
@@ -700,7 +653,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
700653;
701654; PTX-LABEL: test_select_write(
702655; PTX: {
703- ; PTX-NEXT: .local .align 4 .b8 __local_depot14 [8];
656+ ; PTX-NEXT: .local .align 4 .b8 __local_depot12 [8];
704657; PTX-NEXT: .reg .b64 %SP;
705658; PTX-NEXT: .reg .b64 %SPL;
706659; PTX-NEXT: .reg .pred %p<2>;
@@ -709,7 +662,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
709662; PTX-NEXT: .reg .b64 %rd<6>;
710663; PTX-EMPTY:
711664; PTX-NEXT: // %bb.0: // %bb
712- ; PTX-NEXT: mov.b64 %SPL, __local_depot14 ;
665+ ; PTX-NEXT: mov.b64 %SPL, __local_depot12 ;
713666; PTX-NEXT: cvta.local.u64 %SP, %SPL;
714667; PTX-NEXT: ld.param.u8 %rs1, [test_select_write_param_3];
715668; PTX-NEXT: and.b16 %rs2, %rs1, 1;
@@ -809,10 +762,10 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
809762; PTX_60-NEXT: ld.param.u64 %rd2, [test_phi_param_2];
810763; PTX_60-NEXT: cvta.to.global.u64 %rd1, %rd2;
811764; PTX_60-NEXT: ld.param.u32 %r4, [test_phi_param_0];
812- ; PTX_60-NEXT: @%p1 bra $L__BB15_2 ;
765+ ; PTX_60-NEXT: @%p1 bra $L__BB13_2 ;
813766; PTX_60-NEXT: // %bb.1: // %second
814767; PTX_60-NEXT: ld.param.u32 %r4, [test_phi_param_1+4];
815- ; PTX_60-NEXT: $L__BB15_2 : // %merge
768+ ; PTX_60-NEXT: $L__BB13_2 : // %merge
816769; PTX_60-NEXT: st.global.u32 [%rd1], %r4;
817770; PTX_60-NEXT: ret;
818771;
@@ -830,11 +783,11 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval
830783; PTX_70-NEXT: mov.b64 %rd7, test_phi_param_0;
831784; PTX_70-NEXT: ld.param.u64 %rd6, [test_phi_param_2];
832785; PTX_70-NEXT: cvta.to.global.u64 %rd1, %rd6;
833- ; PTX_70-NEXT: @%p1 bra $L__BB15_2 ;
786+ ; PTX_70-NEXT: @%p1 bra $L__BB13_2 ;
834787; PTX_70-NEXT: // %bb.1: // %second
835788; PTX_70-NEXT: mov.b64 %rd2, test_phi_param_1;
836789; PTX_70-NEXT: add.s64 %rd7, %rd2, 4;
837- ; PTX_70-NEXT: $L__BB15_2 : // %merge
790+ ; PTX_70-NEXT: $L__BB13_2 : // %merge
838791; PTX_70-NEXT: ld.param.u32 %r1, [%rd7];
839792; PTX_70-NEXT: st.global.u32 [%rd1], %r1;
840793; PTX_70-NEXT: ret;
@@ -880,7 +833,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
880833;
881834; PTX-LABEL: test_phi_write(
882835; PTX: {
883- ; PTX-NEXT: .local .align 4 .b8 __local_depot16 [8];
836+ ; PTX-NEXT: .local .align 4 .b8 __local_depot14 [8];
884837; PTX-NEXT: .reg .b64 %SP;
885838; PTX-NEXT: .reg .b64 %SPL;
886839; PTX-NEXT: .reg .pred %p<2>;
@@ -889,7 +842,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
889842; PTX-NEXT: .reg .b64 %rd<7>;
890843; PTX-EMPTY:
891844; PTX-NEXT: // %bb.0: // %bb
892- ; PTX-NEXT: mov.b64 %SPL, __local_depot16 ;
845+ ; PTX-NEXT: mov.b64 %SPL, __local_depot14 ;
893846; PTX-NEXT: cvta.local.u64 %SP, %SPL;
894847; PTX-NEXT: ld.param.u8 %rs1, [test_phi_write_param_2];
895848; PTX-NEXT: and.b16 %rs2, %rs1, 1;
@@ -900,10 +853,10 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
900853; PTX-NEXT: add.u64 %rd6, %SPL, 4;
901854; PTX-NEXT: ld.param.u32 %r2, [test_phi_write_param_0];
902855; PTX-NEXT: st.u32 [%SP+4], %r2;
903- ; PTX-NEXT: @%p1 bra $L__BB16_2 ;
856+ ; PTX-NEXT: @%p1 bra $L__BB14_2 ;
904857; PTX-NEXT: // %bb.1: // %second
905858; PTX-NEXT: mov.b64 %rd6, %rd1;
906- ; PTX-NEXT: $L__BB16_2 : // %merge
859+ ; PTX-NEXT: $L__BB14_2 : // %merge
907860; PTX-NEXT: mov.b32 %r3, 1;
908861; PTX-NEXT: st.local.u32 [%rd6], %r3;
909862; PTX-NEXT: ret;
@@ -935,14 +888,14 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
935888;
936889; PTX-LABEL: test_forward_byval_arg(
937890; PTX: {
938- ; PTX-NEXT: .local .align 4 .b8 __local_depot17 [4];
891+ ; PTX-NEXT: .local .align 4 .b8 __local_depot15 [4];
939892; PTX-NEXT: .reg .b64 %SP;
940893; PTX-NEXT: .reg .b64 %SPL;
941894; PTX-NEXT: .reg .b32 %r<2>;
942895; PTX-NEXT: .reg .b64 %rd<3>;
943896; PTX-EMPTY:
944897; PTX-NEXT: // %bb.0:
945- ; PTX-NEXT: mov.b64 %SPL, __local_depot17 ;
898+ ; PTX-NEXT: mov.b64 %SPL, __local_depot15 ;
946899; PTX-NEXT: add.u64 %rd2, %SPL, 0;
947900; PTX-NEXT: ld.param.u32 %r1, [test_forward_byval_arg_param_0];
948901; PTX-NEXT: st.local.u32 [%rd2], %r1;
0 commit comments