@@ -148,35 +148,34 @@ entry:
148148define dso_local i32 @variadics2 (i32 noundef %first , ...) {
149149; CHECK-PTX-LABEL: variadics2(
150150; CHECK-PTX: {
151- ; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4 ];
151+ ; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3 ];
152152; CHECK-PTX-NEXT: .reg .b64 %SP;
153153; CHECK-PTX-NEXT: .reg .b64 %SPL;
154- ; CHECK-PTX-NEXT: .reg .b16 %rs<6 >;
154+ ; CHECK-PTX-NEXT: .reg .b16 %rs<4 >;
155155; CHECK-PTX-NEXT: .reg .b32 %r<7>;
156- ; CHECK-PTX-NEXT: .reg .b64 %rd<7 >;
156+ ; CHECK-PTX-NEXT: .reg .b64 %rd<9 >;
157157; CHECK-PTX-EMPTY:
158158; CHECK-PTX-NEXT: // %bb.0: // %entry
159159; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
160- ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
161160; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
162161; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
163- ; CHECK-PTX-NEXT: add.s64 %rd2 , %rd1, 7 ;
164- ; CHECK-PTX-NEXT: and.b64 %rd3 , %rd2, -8 ;
165- ; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3] ;
166- ; CHECK-PTX-NEXT: ld.s8 %r3 , [%rd3+4 ];
167- ; CHECK-PTX-NEXT: ld.u8 %rs1 , [%rd3+7 ];
168- ; CHECK-PTX-NEXT: st .u8 [%SP+2], %rs1 ;
169- ; CHECK-PTX-NEXT: ld. u8 %rs2, [%rd3+5] ;
170- ; CHECK-PTX-NEXT: ld.u8 %rs3 , [%rd3 +6];
171- ; CHECK-PTX-NEXT: shl.b16 %rs4 , %rs3, 8 ;
172- ; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2 ;
173- ; CHECK-PTX-NEXT: st.u16 [%SP ], %rs5 ;
174- ; CHECK-PTX-NEXT: ld.u64 %rd4 , [%rd3 +8];
162+ ; CHECK-PTX-NEXT: add.u64 %rd3 , %SPL, 0 ;
163+ ; CHECK-PTX-NEXT: add.s64 %rd4 , %rd1, 7 ;
164+ ; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8 ;
165+ ; CHECK-PTX-NEXT: ld.u32 %r2 , [%rd5 ];
166+ ; CHECK-PTX-NEXT: ld.s8 %r3 , [%rd5+4 ];
167+ ; CHECK-PTX-NEXT: ld .u8 %rs1, [%rd5+7] ;
168+ ; CHECK-PTX-NEXT: st.local. u8 [%rd3+2], %rs1 ;
169+ ; CHECK-PTX-NEXT: ld.u8 %rs2 , [%rd5 +6];
170+ ; CHECK-PTX-NEXT: st.local.u8 [%rd3+1] , %rs2 ;
171+ ; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd5+5] ;
172+ ; CHECK-PTX-NEXT: st.local.u8 [%rd3 ], %rs3 ;
173+ ; CHECK-PTX-NEXT: ld.u64 %rd6 , [%rd5 +8];
175174; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
176175; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
177- ; CHECK-PTX-NEXT: cvt.u64.u32 %rd5 , %r5;
178- ; CHECK-PTX-NEXT: add.s64 %rd6 , %rd5 , %rd4 ;
179- ; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6 ;
176+ ; CHECK-PTX-NEXT: cvt.u64.u32 %rd7 , %r5;
177+ ; CHECK-PTX-NEXT: add.s64 %rd8 , %rd7 , %rd6 ;
178+ ; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8 ;
180179; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
181180; CHECK-PTX-NEXT: ret;
182181entry:
@@ -213,7 +212,7 @@ define dso_local i32 @bar() {
213212; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
214213; CHECK-PTX-NEXT: .reg .b64 %SP;
215214; CHECK-PTX-NEXT: .reg .b64 %SPL;
216- ; CHECK-PTX-NEXT: .reg .b16 %rs<10 >;
215+ ; CHECK-PTX-NEXT: .reg .b16 %rs<8 >;
217216; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218217; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
219218; CHECK-PTX-EMPTY:
@@ -228,9 +227,7 @@ define dso_local i32 @bar() {
228227; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
229228; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd1+6];
230229; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
231- ; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
232- ; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
233- ; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
230+ ; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
234231; CHECK-PTX-NEXT: mov.b32 %r1, 1;
235232; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
236233; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
0 commit comments