@@ -115,5 +115,150 @@ define ptx_kernel void @inlineasm(ptr %p) {
115115 store <2 x float > %mul , ptr %p , align 8
116116 ret void
117117}
118+
119+ define ptx_kernel void @trunc_v2i32 (<2 x i32 > %0 ) {
120+ ; CHECK-SM90A-LABEL: trunc_v2i32(
121+ ; CHECK-SM90A: {
122+ ; CHECK-SM90A-NEXT: .reg .b32 %r<7>;
123+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
124+ ; CHECK-SM90A-EMPTY:
125+ ; CHECK-SM90A-NEXT: // %bb.0:
126+ ; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r1, %r2}, [trunc_v2i32_param_0];
127+ ; CHECK-SM90A-NEXT: prmt.b32 %r3, %r1, %r2, 0x3340U;
128+ ; CHECK-SM90A-NEXT: mov.b32 %r4, 0;
129+ ; CHECK-SM90A-NEXT: prmt.b32 %r5, %r4, 0, 0x3340U;
130+ ; CHECK-SM90A-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
131+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 0;
132+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r6;
133+ ; CHECK-SM90A-NEXT: ret;
134+ ;
135+ ; CHECK-SM100-LABEL: trunc_v2i32(
136+ ; CHECK-SM100: {
137+ ; CHECK-SM100-NEXT: .reg .b32 %r<7>;
138+ ; CHECK-SM100-NEXT: .reg .b64 %rd<3>;
139+ ; CHECK-SM100-EMPTY:
140+ ; CHECK-SM100-NEXT: // %bb.0:
141+ ; CHECK-SM100-NEXT: ld.param.b64 %rd1, [trunc_v2i32_param_0];
142+ ; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd1;
143+ ; CHECK-SM100-NEXT: mov.b32 %r3, 0;
144+ ; CHECK-SM100-NEXT: prmt.b32 %r4, %r3, 0, 0x3340U;
145+ ; CHECK-SM100-NEXT: prmt.b32 %r5, %r1, %r2, 0x3340U;
146+ ; CHECK-SM100-NEXT: prmt.b32 %r6, %r4, %r5, 0x5410U;
147+ ; CHECK-SM100-NEXT: mov.b64 %rd2, 0;
148+ ; CHECK-SM100-NEXT: st.b32 [%rd2], %r6;
149+ ; CHECK-SM100-NEXT: ret;
150+ %2 = trunc <2 x i32 > %0 to <2 x i8 >
151+ %3 = shufflevector <2 x i8 > zeroinitializer , <2 x i8 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
152+ store <4 x i8 > %3 , ptr null , align 4
153+ ret void
154+ }
155+
156+ define ptx_kernel void @zextend_to_v2i32 (<2 x i8 > %0 ) {
157+ ; CHECK-SM90A-LABEL: zextend_to_v2i32(
158+ ; CHECK-SM90A: {
159+ ; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
160+ ; CHECK-SM90A-NEXT: .reg .b32 %r<4>;
161+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
162+ ; CHECK-SM90A-EMPTY:
163+ ; CHECK-SM90A-NEXT: // %bb.0:
164+ ; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
165+ ; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
166+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
167+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r3, %rs2;
168+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
169+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r3;
170+ ; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
171+ ; CHECK-SM90A-NEXT: st.b32 [%rd2], %r2;
172+ ; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
173+ ; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
174+ ; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
175+ ; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
176+ ; CHECK-SM90A-NEXT: ret;
177+ ;
178+ ; CHECK-SM100-LABEL: zextend_to_v2i32(
179+ ; CHECK-SM100: {
180+ ; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
181+ ; CHECK-SM100-NEXT: .reg .b32 %r<5>;
182+ ; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
183+ ; CHECK-SM100-EMPTY:
184+ ; CHECK-SM100-NEXT: // %bb.0:
185+ ; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
186+ ; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
187+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
188+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r3, %rs1;
189+ ; CHECK-SM100-NEXT: mov.b64 %rd1, {%r3, %r2};
190+ ; CHECK-SM100-NEXT: mov.b32 %r4, 0;
191+ ; CHECK-SM100-NEXT: mov.b64 %rd2, {%r4, %r4};
192+ ; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
193+ ; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
194+ ; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
195+ ; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
196+ ; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
197+ ; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
198+ ; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
199+ ; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
200+ ; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
201+ ; CHECK-SM100-NEXT: ret;
202+ %2 = zext <2 x i8 > %0 to <2 x i32 >
203+ %3 = shufflevector <2 x i32 > zeroinitializer , <2 x i32 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
204+ store <4 x i32 > %3 , ptr null , align 4
205+ ret void
206+ }
207+
208+ define ptx_kernel void @sextend_to_v2i32 (<2 x i8 > %0 ) {
209+ ; CHECK-SM90A-LABEL: sextend_to_v2i32(
210+ ; CHECK-SM90A: {
211+ ; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
212+ ; CHECK-SM90A-NEXT: .reg .b32 %r<6>;
213+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
214+ ; CHECK-SM90A-EMPTY:
215+ ; CHECK-SM90A-NEXT: // %bb.0:
216+ ; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
217+ ; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
218+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
219+ ; CHECK-SM90A-NEXT: cvt.s32.s8 %r3, %r2;
220+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r4, %rs2;
221+ ; CHECK-SM90A-NEXT: cvt.s32.s8 %r5, %r4;
222+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
223+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r5;
224+ ; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
225+ ; CHECK-SM90A-NEXT: st.b32 [%rd2], %r3;
226+ ; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
227+ ; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
228+ ; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
229+ ; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
230+ ; CHECK-SM90A-NEXT: ret;
231+ ;
232+ ; CHECK-SM100-LABEL: sextend_to_v2i32(
233+ ; CHECK-SM100: {
234+ ; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
235+ ; CHECK-SM100-NEXT: .reg .b32 %r<7>;
236+ ; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
237+ ; CHECK-SM100-EMPTY:
238+ ; CHECK-SM100-NEXT: // %bb.0:
239+ ; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
240+ ; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
241+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
242+ ; CHECK-SM100-NEXT: cvt.s32.s8 %r3, %r2;
243+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r4, %rs1;
244+ ; CHECK-SM100-NEXT: cvt.s32.s8 %r5, %r4;
245+ ; CHECK-SM100-NEXT: mov.b64 %rd1, {%r5, %r3};
246+ ; CHECK-SM100-NEXT: mov.b32 %r6, 0;
247+ ; CHECK-SM100-NEXT: mov.b64 %rd2, {%r6, %r6};
248+ ; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
249+ ; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
250+ ; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
251+ ; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
252+ ; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
253+ ; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
254+ ; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
255+ ; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
256+ ; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
257+ ; CHECK-SM100-NEXT: ret;
258+ %2 = sext <2 x i8 > %0 to <2 x i32 >
259+ %3 = shufflevector <2 x i32 > zeroinitializer , <2 x i32 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
260+ store <4 x i32 > %3 , ptr null , align 4
261+ ret void
262+ }
118263;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
119264; CHECK: {{.*}}
0 commit comments