@@ -115,5 +115,150 @@ define ptx_kernel void @inlineasm(ptr %p) {
115
115
store <2 x float > %mul , ptr %p , align 8
116
116
ret void
117
117
}
118
+
119
+ define ptx_kernel void @trunc_v2i32 (<2 x i32 > %0 ) {
120
+ ; CHECK-SM90A-LABEL: trunc_v2i32(
121
+ ; CHECK-SM90A: {
122
+ ; CHECK-SM90A-NEXT: .reg .b32 %r<7>;
123
+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<2>;
124
+ ; CHECK-SM90A-EMPTY:
125
+ ; CHECK-SM90A-NEXT: // %bb.0:
126
+ ; CHECK-SM90A-NEXT: ld.param.v2.b32 {%r1, %r2}, [trunc_v2i32_param_0];
127
+ ; CHECK-SM90A-NEXT: prmt.b32 %r3, %r1, %r2, 0x3340U;
128
+ ; CHECK-SM90A-NEXT: mov.b32 %r4, 0;
129
+ ; CHECK-SM90A-NEXT: prmt.b32 %r5, %r4, 0, 0x3340U;
130
+ ; CHECK-SM90A-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
131
+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 0;
132
+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r6;
133
+ ; CHECK-SM90A-NEXT: ret;
134
+ ;
135
+ ; CHECK-SM100-LABEL: trunc_v2i32(
136
+ ; CHECK-SM100: {
137
+ ; CHECK-SM100-NEXT: .reg .b32 %r<7>;
138
+ ; CHECK-SM100-NEXT: .reg .b64 %rd<3>;
139
+ ; CHECK-SM100-EMPTY:
140
+ ; CHECK-SM100-NEXT: // %bb.0:
141
+ ; CHECK-SM100-NEXT: ld.param.b64 %rd1, [trunc_v2i32_param_0];
142
+ ; CHECK-SM100-NEXT: mov.b64 {%r1, %r2}, %rd1;
143
+ ; CHECK-SM100-NEXT: mov.b32 %r3, 0;
144
+ ; CHECK-SM100-NEXT: prmt.b32 %r4, %r3, 0, 0x3340U;
145
+ ; CHECK-SM100-NEXT: prmt.b32 %r5, %r1, %r2, 0x3340U;
146
+ ; CHECK-SM100-NEXT: prmt.b32 %r6, %r4, %r5, 0x5410U;
147
+ ; CHECK-SM100-NEXT: mov.b64 %rd2, 0;
148
+ ; CHECK-SM100-NEXT: st.b32 [%rd2], %r6;
149
+ ; CHECK-SM100-NEXT: ret;
150
+ %2 = trunc <2 x i32 > %0 to <2 x i8 >
151
+ %3 = shufflevector <2 x i8 > zeroinitializer , <2 x i8 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
152
+ store <4 x i8 > %3 , ptr null , align 4
153
+ ret void
154
+ }
155
+
156
+ define ptx_kernel void @zextend_to_v2i32 (<2 x i8 > %0 ) {
157
+ ; CHECK-SM90A-LABEL: zextend_to_v2i32(
158
+ ; CHECK-SM90A: {
159
+ ; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
160
+ ; CHECK-SM90A-NEXT: .reg .b32 %r<4>;
161
+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
162
+ ; CHECK-SM90A-EMPTY:
163
+ ; CHECK-SM90A-NEXT: // %bb.0:
164
+ ; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
165
+ ; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
166
+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
167
+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r3, %rs2;
168
+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
169
+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r3;
170
+ ; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
171
+ ; CHECK-SM90A-NEXT: st.b32 [%rd2], %r2;
172
+ ; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
173
+ ; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
174
+ ; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
175
+ ; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
176
+ ; CHECK-SM90A-NEXT: ret;
177
+ ;
178
+ ; CHECK-SM100-LABEL: zextend_to_v2i32(
179
+ ; CHECK-SM100: {
180
+ ; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
181
+ ; CHECK-SM100-NEXT: .reg .b32 %r<5>;
182
+ ; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
183
+ ; CHECK-SM100-EMPTY:
184
+ ; CHECK-SM100-NEXT: // %bb.0:
185
+ ; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [zextend_to_v2i32_param_0];
186
+ ; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
187
+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
188
+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r3, %rs1;
189
+ ; CHECK-SM100-NEXT: mov.b64 %rd1, {%r3, %r2};
190
+ ; CHECK-SM100-NEXT: mov.b32 %r4, 0;
191
+ ; CHECK-SM100-NEXT: mov.b64 %rd2, {%r4, %r4};
192
+ ; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
193
+ ; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
194
+ ; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
195
+ ; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
196
+ ; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
197
+ ; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
198
+ ; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
199
+ ; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
200
+ ; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
201
+ ; CHECK-SM100-NEXT: ret;
202
+ %2 = zext <2 x i8 > %0 to <2 x i32 >
203
+ %3 = shufflevector <2 x i32 > zeroinitializer , <2 x i32 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
204
+ store <4 x i32 > %3 , ptr null , align 4
205
+ ret void
206
+ }
207
+
208
+ define ptx_kernel void @sextend_to_v2i32 (<2 x i8 > %0 ) {
209
+ ; CHECK-SM90A-LABEL: sextend_to_v2i32(
210
+ ; CHECK-SM90A: {
211
+ ; CHECK-SM90A-NEXT: .reg .b16 %rs<3>;
212
+ ; CHECK-SM90A-NEXT: .reg .b32 %r<6>;
213
+ ; CHECK-SM90A-NEXT: .reg .b64 %rd<5>;
214
+ ; CHECK-SM90A-EMPTY:
215
+ ; CHECK-SM90A-NEXT: // %bb.0:
216
+ ; CHECK-SM90A-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
217
+ ; CHECK-SM90A-NEXT: mov.b32 %r1, {%rs1, %rs2};
218
+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r2, %rs1;
219
+ ; CHECK-SM90A-NEXT: cvt.s32.s8 %r3, %r2;
220
+ ; CHECK-SM90A-NEXT: cvt.u32.u16 %r4, %rs2;
221
+ ; CHECK-SM90A-NEXT: cvt.s32.s8 %r5, %r4;
222
+ ; CHECK-SM90A-NEXT: mov.b64 %rd1, 12;
223
+ ; CHECK-SM90A-NEXT: st.b32 [%rd1], %r5;
224
+ ; CHECK-SM90A-NEXT: mov.b64 %rd2, 8;
225
+ ; CHECK-SM90A-NEXT: st.b32 [%rd2], %r3;
226
+ ; CHECK-SM90A-NEXT: mov.b64 %rd3, 4;
227
+ ; CHECK-SM90A-NEXT: st.b32 [%rd3], 0;
228
+ ; CHECK-SM90A-NEXT: mov.b64 %rd4, 0;
229
+ ; CHECK-SM90A-NEXT: st.b32 [%rd4], 0;
230
+ ; CHECK-SM90A-NEXT: ret;
231
+ ;
232
+ ; CHECK-SM100-LABEL: sextend_to_v2i32(
233
+ ; CHECK-SM100: {
234
+ ; CHECK-SM100-NEXT: .reg .b16 %rs<3>;
235
+ ; CHECK-SM100-NEXT: .reg .b32 %r<7>;
236
+ ; CHECK-SM100-NEXT: .reg .b64 %rd<8>;
237
+ ; CHECK-SM100-EMPTY:
238
+ ; CHECK-SM100-NEXT: // %bb.0:
239
+ ; CHECK-SM100-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [sextend_to_v2i32_param_0];
240
+ ; CHECK-SM100-NEXT: mov.b32 %r1, {%rs1, %rs2};
241
+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r2, %rs2;
242
+ ; CHECK-SM100-NEXT: cvt.s32.s8 %r3, %r2;
243
+ ; CHECK-SM100-NEXT: cvt.u32.u16 %r4, %rs1;
244
+ ; CHECK-SM100-NEXT: cvt.s32.s8 %r5, %r4;
245
+ ; CHECK-SM100-NEXT: mov.b64 %rd1, {%r5, %r3};
246
+ ; CHECK-SM100-NEXT: mov.b32 %r6, 0;
247
+ ; CHECK-SM100-NEXT: mov.b64 %rd2, {%r6, %r6};
248
+ ; CHECK-SM100-NEXT: mov.b64 %rd3, 4;
249
+ ; CHECK-SM100-NEXT: st.b32 [%rd3], %rd2;
250
+ ; CHECK-SM100-NEXT: mov.b64 %rd4, 0;
251
+ ; CHECK-SM100-NEXT: st.b32 [%rd4], %rd2;
252
+ ; CHECK-SM100-NEXT: mov.b64 %rd5, 8;
253
+ ; CHECK-SM100-NEXT: st.b32 [%rd5], %rd1;
254
+ ; CHECK-SM100-NEXT: shr.u64 %rd6, %rd1, 32;
255
+ ; CHECK-SM100-NEXT: mov.b64 %rd7, 12;
256
+ ; CHECK-SM100-NEXT: st.b32 [%rd7], %rd6;
257
+ ; CHECK-SM100-NEXT: ret;
258
+ %2 = sext <2 x i8 > %0 to <2 x i32 >
259
+ %3 = shufflevector <2 x i32 > zeroinitializer , <2 x i32 > %2 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
260
+ store <4 x i32 > %3 , ptr null , align 4
261
+ ret void
262
+ }
118
263
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
119
264
; CHECK: {{.*}}
0 commit comments