@@ -148,35 +148,34 @@ entry:
148148define dso_local i32 @variadics2 (i32 noundef %first , ...) {
149149; CHECK-PTX-LABEL: variadics2(
150150; CHECK-PTX: {
151- ; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4 ];
151+ ; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3 ];
152152; CHECK-PTX-NEXT: .reg .b64 %SP;
153153; CHECK-PTX-NEXT: .reg .b64 %SPL;
154- ; CHECK-PTX-NEXT: .reg .b16 %rs<6 >;
154+ ; CHECK-PTX-NEXT: .reg .b16 %rs<4 >;
155155; CHECK-PTX-NEXT: .reg .b32 %r<7>;
156- ; CHECK-PTX-NEXT: .reg .b64 %rd<7 >;
156+ ; CHECK-PTX-NEXT: .reg .b64 %rd<9 >;
157157; CHECK-PTX-EMPTY:
158158; CHECK-PTX-NEXT: // %bb.0: // %entry
159159; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
160- ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
161160; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
162161; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
163- ; CHECK-PTX-NEXT: add.s64 %rd2 , %rd1, 7 ;
164- ; CHECK-PTX-NEXT: and.b64 %rd3 , %rd2, -8 ;
165- ; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3] ;
166- ; CHECK-PTX-NEXT: ld.s8 %r3 , [%rd3+4 ];
167- ; CHECK-PTX-NEXT: ld.u8 %rs1 , [%rd3+7 ];
168- ; CHECK-PTX-NEXT: st .u8 [%SP+2], %rs1 ;
169- ; CHECK-PTX-NEXT: ld. u8 %rs2, [%rd3+5] ;
170- ; CHECK-PTX-NEXT: ld.u8 %rs3 , [%rd3 +6];
171- ; CHECK-PTX-NEXT: shl.b16 %rs4 , %rs3, 8 ;
172- ; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2 ;
173- ; CHECK-PTX-NEXT: st.u16 [%SP ], %rs5 ;
174- ; CHECK-PTX-NEXT: ld.u64 %rd4 , [%rd3 +8];
162+ ; CHECK-PTX-NEXT: add.u64 %rd3 , %SPL, 0 ;
163+ ; CHECK-PTX-NEXT: add.s64 %rd4 , %rd1, 7 ;
164+ ; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8 ;
165+ ; CHECK-PTX-NEXT: ld.u32 %r2 , [%rd5 ];
166+ ; CHECK-PTX-NEXT: ld.s8 %r3 , [%rd5+4 ];
167+ ; CHECK-PTX-NEXT: ld .u8 %rs1, [%rd5+7] ;
168+ ; CHECK-PTX-NEXT: st.local. u8 [%rd3+2], %rs1 ;
169+ ; CHECK-PTX-NEXT: ld.u8 %rs2 , [%rd5 +6];
170+ ; CHECK-PTX-NEXT: st.local.u8 [%rd3+1] , %rs2 ;
171+ ; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd5+5] ;
172+ ; CHECK-PTX-NEXT: st.local.u8 [%rd3 ], %rs3 ;
173+ ; CHECK-PTX-NEXT: ld.u64 %rd6 , [%rd5 +8];
175174; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
176175; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
177- ; CHECK-PTX-NEXT: cvt.u64.u32 %rd5 , %r5;
178- ; CHECK-PTX-NEXT: add.s64 %rd6 , %rd5 , %rd4 ;
179- ; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6 ;
176+ ; CHECK-PTX-NEXT: cvt.u64.u32 %rd7 , %r5;
177+ ; CHECK-PTX-NEXT: add.s64 %rd8 , %rd7 , %rd6 ;
178+ ; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8 ;
180179; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
181180; CHECK-PTX-NEXT: ret;
182181entry:
@@ -213,39 +212,39 @@ define dso_local i32 @bar() {
213212; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
214213; CHECK-PTX-NEXT: .reg .b64 %SP;
215214; CHECK-PTX-NEXT: .reg .b64 %SPL;
216- ; CHECK-PTX-NEXT: .reg .b16 %rs<10 >;
215+ ; CHECK-PTX-NEXT: .reg .b16 %rs<8 >;
217216; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218- ; CHECK-PTX-NEXT: .reg .b64 %rd<7 >;
217+ ; CHECK-PTX-NEXT: .reg .b64 %rd<9 >;
219218; CHECK-PTX-EMPTY:
220219; CHECK-PTX-NEXT: // %bb.0: // %entry
221220; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
222221; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
223- ; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
224- ; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
225- ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
222+ ; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
223+ ; CHECK-PTX-NEXT: mov.u64 %rd3, __const_$_bar_$_s1;
224+ ; CHECK-PTX-NEXT: add.s64 %rd4, %rd3, 7;
225+ ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd4];
226226; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
227- ; CHECK-PTX-NEXT: st.u8 [%SP +2], %rs2;
228- ; CHECK-PTX-NEXT: add.s64 %rd3 , %rd1, 5 ;
229- ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3 ];
227+ ; CHECK-PTX-NEXT: st.local. u8 [%rd2 +2], %rs2;
228+ ; CHECK-PTX-NEXT: add.s64 %rd5 , %rd3, 6 ;
229+ ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd5 ];
230230; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
231- ; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
232- ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
231+ ; CHECK-PTX-NEXT: st.local.u8 [%rd2+1], %rs4;
232+ ; CHECK-PTX-NEXT: add.s64 %rd6, %rd3, 5;
233+ ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd6];
233234; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
234- ; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
235- ; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
236- ; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
235+ ; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
237236; CHECK-PTX-NEXT: mov.b32 %r1, 1;
238237; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
239- ; CHECK-PTX-NEXT: mov.b16 %rs9 , 1;
240- ; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9 ;
241- ; CHECK-PTX-NEXT: mov.b64 %rd5 , 1;
242- ; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5 ;
243- ; CHECK-PTX-NEXT: add.u64 %rd6 , %SP, 8;
238+ ; CHECK-PTX-NEXT: mov.b16 %rs7 , 1;
239+ ; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs7 ;
240+ ; CHECK-PTX-NEXT: mov.b64 %rd7 , 1;
241+ ; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7 ;
242+ ; CHECK-PTX-NEXT: add.u64 %rd8 , %SP, 8;
244243; CHECK-PTX-NEXT: { // callseq 1, 0
245244; CHECK-PTX-NEXT: .param .b32 param0;
246245; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
247246; CHECK-PTX-NEXT: .param .b64 param1;
248- ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6 ;
247+ ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8 ;
249248; CHECK-PTX-NEXT: .param .b32 retval0;
250249; CHECK-PTX-NEXT: call.uni (retval0),
251250; CHECK-PTX-NEXT: variadics2,
@@ -384,26 +383,29 @@ define dso_local void @qux() {
384383; CHECK-PTX-NEXT: .reg .b64 %SP;
385384; CHECK-PTX-NEXT: .reg .b64 %SPL;
386385; CHECK-PTX-NEXT: .reg .b32 %r<3>;
387- ; CHECK-PTX-NEXT: .reg .b64 %rd<7 >;
386+ ; CHECK-PTX-NEXT: .reg .b64 %rd<11 >;
388387; CHECK-PTX-EMPTY:
389388; CHECK-PTX-NEXT: // %bb.0: // %entry
390389; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
391390; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
392- ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
393- ; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
394- ; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
395- ; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
396- ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
397- ; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
398- ; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
399- ; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
400- ; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
391+ ; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
392+ ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s];
393+ ; CHECK-PTX-NEXT: st.local.u64 [%rd2], %rd3;
394+ ; CHECK-PTX-NEXT: mov.u64 %rd4, __const_$_qux_$_s;
395+ ; CHECK-PTX-NEXT: add.s64 %rd5, %rd4, 8;
396+ ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd6, [%rd5];
397+ ; CHECK-PTX-NEXT: st.local.u64 [%rd2+8], %rd6;
398+ ; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
399+ ; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
400+ ; CHECK-PTX-NEXT: ld.u64 %rd8, [%SP];
401+ ; CHECK-PTX-NEXT: ld.u64 %rd9, [%SP+8];
402+ ; CHECK-PTX-NEXT: add.u64 %rd10, %SP, 16;
401403; CHECK-PTX-NEXT: { // callseq 3, 0
402404; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
403- ; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1 ;
404- ; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4 ;
405+ ; CHECK-PTX-NEXT: st.param.b64 [param0], %rd8 ;
406+ ; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd9 ;
405407; CHECK-PTX-NEXT: .param .b64 param1;
406- ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6 ;
408+ ; CHECK-PTX-NEXT: st.param.b64 [param1], %rd10 ;
407409; CHECK-PTX-NEXT: .param .b32 retval0;
408410; CHECK-PTX-NEXT: call.uni (retval0),
409411; CHECK-PTX-NEXT: variadics4,
0 commit comments