Skip to content

Commit 5e0991c

Browse files
committed
pre-commit tests
1 parent 75179c7 commit 5e0991c

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

llvm/test/CodeGen/NVPTX/i8x2-instructions.ll

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,5 +132,172 @@ define <2 x float> @test_uitofp_2xi8(<2 x i8> %a) {
132132
%1 = uitofp <2 x i8> %a to <2 x float>
133133
ret <2 x float> %1
134134
}
135+
136+
define void @test_store_i8x2_unaligned(ptr %ptr, <2 x i8> %a) {
137+
; O0-LABEL: test_store_i8x2_unaligned(
138+
; O0: {
139+
; O0-NEXT: .reg .b16 %rs<3>;
140+
; O0-NEXT: .reg .b32 %r<3>;
141+
; O0-NEXT: .reg .b64 %rd<2>;
142+
; O0-EMPTY:
143+
; O0-NEXT: // %bb.0:
144+
; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
145+
; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
146+
; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
147+
; O0-NEXT: st.b8 [%rd1], %r1;
148+
; O0-NEXT: shr.u32 %r2, %r1, 16;
149+
; O0-NEXT: st.b8 [%rd1+2], %r2;
150+
; O0-NEXT: st.b8 [%rd1+1], 0;
151+
; O0-NEXT: st.b8 [%rd1+3], 0;
152+
; O0-NEXT: ret;
153+
;
154+
; O3-LABEL: test_store_i8x2_unaligned(
155+
; O3: {
156+
; O3-NEXT: .reg .b16 %rs<3>;
157+
; O3-NEXT: .reg .b32 %r<3>;
158+
; O3-NEXT: .reg .b64 %rd<2>;
159+
; O3-EMPTY:
160+
; O3-NEXT: // %bb.0:
161+
; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_param_0];
162+
; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_store_i8x2_unaligned_param_1];
163+
; O3-NEXT: mov.b32 %r1, {%rs1, %rs2};
164+
; O3-NEXT: st.b8 [%rd1], %r1;
165+
; O3-NEXT: shr.u32 %r2, %r1, 16;
166+
; O3-NEXT: st.b8 [%rd1+2], %r2;
167+
; O3-NEXT: st.b8 [%rd1+3], 0;
168+
; O3-NEXT: st.b8 [%rd1+1], 0;
169+
; O3-NEXT: ret;
170+
store <2 x i8> %a, ptr %ptr, align 1
171+
ret void
172+
}
173+
174+
define void @test_store_i8x2_unaligned_immediate(ptr %ptr) {
175+
; O0-LABEL: test_store_i8x2_unaligned_immediate(
176+
; O0: {
177+
; O0-NEXT: .reg .b64 %rd<2>;
178+
; O0-EMPTY:
179+
; O0-NEXT: // %bb.0:
180+
; O0-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
181+
; O0-NEXT: st.b8 [%rd1+3], 0;
182+
; O0-NEXT: st.b8 [%rd1+2], 2;
183+
; O0-NEXT: st.b8 [%rd1+1], 0;
184+
; O0-NEXT: st.b8 [%rd1], 1;
185+
; O0-NEXT: ret;
186+
;
187+
; O3-LABEL: test_store_i8x2_unaligned_immediate(
188+
; O3: {
189+
; O3-NEXT: .reg .b64 %rd<2>;
190+
; O3-EMPTY:
191+
; O3-NEXT: // %bb.0:
192+
; O3-NEXT: ld.param.b64 %rd1, [test_store_i8x2_unaligned_immediate_param_0];
193+
; O3-NEXT: st.b8 [%rd1+3], 0;
194+
; O3-NEXT: st.b8 [%rd1+2], 2;
195+
; O3-NEXT: st.b8 [%rd1+1], 0;
196+
; O3-NEXT: st.b8 [%rd1], 1;
197+
; O3-NEXT: ret;
198+
store <2 x i8> <i8 1, i8 2>, ptr %ptr, align 1
199+
ret void
200+
}
201+
202+
define i32 @test_zext_load_i8x2_unaligned(ptr %ptr) {
203+
; O0-LABEL: test_zext_load_i8x2_unaligned(
204+
; O0: {
205+
; O0-NEXT: .local .align 2 .b8 __local_depot6[2];
206+
; O0-NEXT: .reg .b64 %SP;
207+
; O0-NEXT: .reg .b64 %SPL;
208+
; O0-NEXT: .reg .b16 %rs<5>;
209+
; O0-NEXT: .reg .b32 %r<2>;
210+
; O0-NEXT: .reg .b64 %rd<2>;
211+
; O0-EMPTY:
212+
; O0-NEXT: // %bb.0:
213+
; O0-NEXT: mov.b64 %SPL, __local_depot6;
214+
; O0-NEXT: cvta.local.u64 %SP, %SPL;
215+
; O0-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
216+
; O0-NEXT: ld.b8 %rs1, [%rd1];
217+
; O0-NEXT: ld.b8 %rs2, [%rd1+1];
218+
; O0-NEXT: shl.b16 %rs3, %rs2, 8;
219+
; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
220+
; O0-NEXT: st.b16 [%SP], %rs4;
221+
; O0-NEXT: ld.b16 %r1, [%SP];
222+
; O0-NEXT: st.param.b32 [func_retval0], %r1;
223+
; O0-NEXT: ret;
224+
;
225+
; O3-LABEL: test_zext_load_i8x2_unaligned(
226+
; O3: {
227+
; O3-NEXT: .local .align 2 .b8 __local_depot6[2];
228+
; O3-NEXT: .reg .b64 %SP;
229+
; O3-NEXT: .reg .b64 %SPL;
230+
; O3-NEXT: .reg .b16 %rs<5>;
231+
; O3-NEXT: .reg .b32 %r<2>;
232+
; O3-NEXT: .reg .b64 %rd<2>;
233+
; O3-EMPTY:
234+
; O3-NEXT: // %bb.0:
235+
; O3-NEXT: mov.b64 %SPL, __local_depot6;
236+
; O3-NEXT: cvta.local.u64 %SP, %SPL;
237+
; O3-NEXT: ld.param.b64 %rd1, [test_zext_load_i8x2_unaligned_param_0];
238+
; O3-NEXT: ld.b8 %rs1, [%rd1];
239+
; O3-NEXT: ld.b8 %rs2, [%rd1+1];
240+
; O3-NEXT: shl.b16 %rs3, %rs2, 8;
241+
; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
242+
; O3-NEXT: st.b16 [%SP], %rs4;
243+
; O3-NEXT: ld.b16 %r1, [%SP];
244+
; O3-NEXT: st.param.b32 [func_retval0], %r1;
245+
; O3-NEXT: ret;
246+
%a = load <2 x i8>, ptr %ptr, align 1
247+
%b = zext <2 x i8> %a to <2 x i16>
248+
%c = bitcast <2 x i16> %b to i32
249+
ret i32 %c
250+
}
251+
252+
define i32 @test_sext_load_i8x2_unaligned(ptr %ptr) {
253+
; O0-LABEL: test_sext_load_i8x2_unaligned(
254+
; O0: {
255+
; O0-NEXT: .local .align 2 .b8 __local_depot7[2];
256+
; O0-NEXT: .reg .b64 %SP;
257+
; O0-NEXT: .reg .b64 %SPL;
258+
; O0-NEXT: .reg .b16 %rs<5>;
259+
; O0-NEXT: .reg .b32 %r<2>;
260+
; O0-NEXT: .reg .b64 %rd<2>;
261+
; O0-EMPTY:
262+
; O0-NEXT: // %bb.0:
263+
; O0-NEXT: mov.b64 %SPL, __local_depot7;
264+
; O0-NEXT: cvta.local.u64 %SP, %SPL;
265+
; O0-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
266+
; O0-NEXT: ld.b8 %rs1, [%rd1];
267+
; O0-NEXT: ld.b8 %rs2, [%rd1+1];
268+
; O0-NEXT: shl.b16 %rs3, %rs2, 8;
269+
; O0-NEXT: or.b16 %rs4, %rs3, %rs1;
270+
; O0-NEXT: st.b16 [%SP], %rs4;
271+
; O0-NEXT: ld.s16 %r1, [%SP];
272+
; O0-NEXT: st.param.b32 [func_retval0], %r1;
273+
; O0-NEXT: ret;
274+
;
275+
; O3-LABEL: test_sext_load_i8x2_unaligned(
276+
; O3: {
277+
; O3-NEXT: .local .align 2 .b8 __local_depot7[2];
278+
; O3-NEXT: .reg .b64 %SP;
279+
; O3-NEXT: .reg .b64 %SPL;
280+
; O3-NEXT: .reg .b16 %rs<5>;
281+
; O3-NEXT: .reg .b32 %r<2>;
282+
; O3-NEXT: .reg .b64 %rd<2>;
283+
; O3-EMPTY:
284+
; O3-NEXT: // %bb.0:
285+
; O3-NEXT: mov.b64 %SPL, __local_depot7;
286+
; O3-NEXT: cvta.local.u64 %SP, %SPL;
287+
; O3-NEXT: ld.param.b64 %rd1, [test_sext_load_i8x2_unaligned_param_0];
288+
; O3-NEXT: ld.b8 %rs1, [%rd1];
289+
; O3-NEXT: ld.b8 %rs2, [%rd1+1];
290+
; O3-NEXT: shl.b16 %rs3, %rs2, 8;
291+
; O3-NEXT: or.b16 %rs4, %rs3, %rs1;
292+
; O3-NEXT: st.b16 [%SP], %rs4;
293+
; O3-NEXT: ld.s16 %r1, [%SP];
294+
; O3-NEXT: st.param.b32 [func_retval0], %r1;
295+
; O3-NEXT: ret;
296+
%a = load <2 x i8>, ptr %ptr, align 1
297+
%b = sext <2 x i8> %a to <2 x i16>
298+
%c = bitcast <2 x i16> %b to i32
299+
ret i32 %c
300+
}
301+
135302
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
136303
; COMMON: {{.*}}

0 commit comments

Comments
 (0)