Skip to content

Commit 2f752d1

Browse files
[AIE2] Tests for instrinsic lowering using shufflevector
1 parent d428827 commit 2f752d1

File tree

1 file changed

+380
-0
lines changed

1 file changed

+380
-0
lines changed
Lines changed: 380 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,380 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
;
3+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
8+
; RUN: llc -O2 -mtriple=aie2 -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s
9+
10+
define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
11+
; CHECK-LABEL: test_extract_vector:
12+
; CHECK: .p2align 4
13+
; CHECK-NEXT: // %bb.0: // %entry
14+
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB0_2; nopv
15+
; CHECK-NEXT: nopx // Delay Slot 5
16+
; CHECK-NEXT: nop // Delay Slot 4
17+
; CHECK-NEXT: nop // Delay Slot 3
18+
; CHECK-NEXT: mov r8, r16 // Delay Slot 2
19+
; CHECK-NEXT: mov r9, r17 // Delay Slot 1
20+
; CHECK-NEXT: // %bb.1: // %if.end
21+
; CHECK-NEXT: mova r16, #8; nopb ; nopxm
22+
; CHECK-NEXT: vextract.s32 r0, x2, r16
23+
; CHECK-NEXT: nop
24+
; CHECK-NEXT: mova r16, #9
25+
; CHECK-NEXT: vextract.s32 r1, x2, r16
26+
; CHECK-NEXT: nop
27+
; CHECK-NEXT: mova r16, #10
28+
; CHECK-NEXT: vextract.s32 r2, x2, r16
29+
; CHECK-NEXT: nop
30+
; CHECK-NEXT: mova r16, #11
31+
; CHECK-NEXT: vextract.s32 r3, x2, r16
32+
; CHECK-NEXT: nop
33+
; CHECK-NEXT: mova r16, #12
34+
; CHECK-NEXT: vextract.s32 r4, x2, r16
35+
; CHECK-NEXT: j #.LBB0_3
36+
; CHECK-NEXT: nop // Delay Slot 5
37+
; CHECK-NEXT: mova r16, #13 // Delay Slot 4
38+
; CHECK-NEXT: vextract.s32 r5, x2, r16 // Delay Slot 3
39+
; CHECK-NEXT: movx r17, #15 // Delay Slot 2
40+
; CHECK-NEXT: mova r16, #14 // Delay Slot 1
41+
; CHECK-NEXT: .p2align 4
42+
; CHECK-NEXT: .LBB0_2: // %if.then
43+
; CHECK-NEXT: nopb ; mova r16, #0; nops ; nopxm ; nopv
44+
; CHECK-NEXT: nopa ; vextract.s32 r0, x2, r16
45+
; CHECK-NEXT: nop
46+
; CHECK-NEXT: mova r16, #1
47+
; CHECK-NEXT: vextract.s32 r1, x2, r16
48+
; CHECK-NEXT: nop
49+
; CHECK-NEXT: mova r16, #2
50+
; CHECK-NEXT: vextract.s32 r2, x2, r16
51+
; CHECK-NEXT: nop
52+
; CHECK-NEXT: mova r16, #3
53+
; CHECK-NEXT: vextract.s32 r3, x2, r16
54+
; CHECK-NEXT: nop
55+
; CHECK-NEXT: mova r16, #4
56+
; CHECK-NEXT: vextract.s32 r4, x2, r16
57+
; CHECK-NEXT: nop
58+
; CHECK-NEXT: mova r16, #5
59+
; CHECK-NEXT: vextract.s32 r5, x2, r16
60+
; CHECK-NEXT: movx r17, #7
61+
; CHECK-NEXT: mova r16, #6
62+
; CHECK-NEXT: .p2align 4
63+
; CHECK-NEXT: .LBB0_3: // %return
64+
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r6, x2, r17; nops
65+
; CHECK-NEXT: nop
66+
; CHECK-NEXT: mov r17, r9
67+
; CHECK-NEXT: vextract.s32 r7, x2, r16
68+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
69+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
70+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
71+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
72+
; CHECK-NEXT: ret lr
73+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
74+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
75+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
76+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
77+
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
78+
entry:
79+
%cmp = icmp eq i32 %idx, 0
80+
br i1 %cmp, label %if.then, label %if.end
81+
82+
if.then:
83+
%shuffle = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
84+
br label %return
85+
86+
if.end:
87+
%shuffle1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
88+
br label %return
89+
90+
return:
91+
%retval.0 = phi <8 x i32> [ %shuffle, %if.then ], [ %shuffle1, %if.end ]
92+
ret <8 x i32> %retval.0
93+
}
94+
95+
define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <8 x i32> noundef %b) {
96+
; CHECK-LABEL: test_insert_vector:
97+
; CHECK: .p2align 4
98+
; CHECK-NEXT: // %bb.0: // %entry
99+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r25, r17
100+
; CHECK-NEXT: mov r26, r18
101+
; CHECK-NEXT: mov r27, r19
102+
; CHECK-NEXT: mova r18, #1
103+
; CHECK-NEXT: mova r19, #2
104+
; CHECK-NEXT: mova r17, #3
105+
; CHECK-NEXT: mov r24, r16
106+
; CHECK-NEXT: mova r16, #0
107+
; CHECK-NEXT: vextract.s32 r1, x4, r16
108+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
109+
; CHECK-NEXT: mova r16, #4
110+
; CHECK-NEXT: vextract.s32 r2, x4, r18
111+
; CHECK-NEXT: vextract.s32 r3, x4, r19
112+
; CHECK-NEXT: vextract.s32 r4, x4, r17
113+
; CHECK-NEXT: vextract.s32 r5, x4, r16
114+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
115+
; CHECK-NEXT: mova r16, #5
116+
; CHECK-NEXT: vextract.s32 r6, x4, r16
117+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
118+
; CHECK-NEXT: mova r16, #6
119+
; CHECK-NEXT: vextract.s32 r7, x4, r16
120+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
121+
; CHECK-NEXT: mova r16, #7
122+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
123+
; CHECK-NEXT: vextract.s32 r8, x4, r16
124+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
125+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
126+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
127+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
128+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
129+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
130+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
131+
; CHECK-NEXT: jz r0, #.LBB1_2
132+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 5
133+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 4
134+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
135+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 2
136+
; CHECK-NEXT: nop // Delay Slot 1
137+
; CHECK-NEXT: // %bb.1: // %if.end
138+
; CHECK-NEXT: mova r16, #3
139+
; CHECK-NEXT: mova r17, #0
140+
; CHECK-NEXT: vextract.s32 r1, x2, r18
141+
; CHECK-NEXT: vextract.s32 r2, x2, r19
142+
; CHECK-NEXT: movx r18, #4
143+
; CHECK-NEXT: mova r19, #5
144+
; CHECK-NEXT: vextract.s32 r3, x2, r16
145+
; CHECK-NEXT: vextract.s32 r0, x2, r17
146+
; CHECK-NEXT: vextract.s32 r8, x0, r17
147+
; CHECK-NEXT: movx r16, #6
148+
; CHECK-NEXT: mova r17, #1
149+
; CHECK-NEXT: vextract.s32 r4, x2, r18
150+
; CHECK-NEXT: vextract.s32 r5, x2, r19
151+
; CHECK-NEXT: vextract.s32 r12, x0, r18
152+
; CHECK-NEXT: vextract.s32 r13, x0, r19
153+
; CHECK-NEXT: vextract.s32 r6, x2, r16
154+
; CHECK-NEXT: vextract.s32 r9, x0, r17
155+
; CHECK-NEXT: movx r16, #7
156+
; CHECK-NEXT: mova r17, #2
157+
; CHECK-NEXT: vextract.s32 r10, x0, r17
158+
; CHECK-NEXT: vextract.s32 r7, x2, r16
159+
; CHECK-NEXT: vextract.s32 r14, x0, r16
160+
; CHECK-NEXT: j #.LBB1_3
161+
; CHECK-NEXT: mova r17, #3 // Delay Slot 5
162+
; CHECK-NEXT: mova r16, #6 // Delay Slot 4
163+
; CHECK-NEXT: vextract.s32 r11, x0, r17 // Delay Slot 3
164+
; CHECK-NEXT: vextract.s32 r15, x0, r16 // Delay Slot 2
165+
; CHECK-NEXT: nop // Delay Slot 1
166+
; CHECK-NEXT: .p2align 4
167+
; CHECK-NEXT: .LBB1_2: // %if.then
168+
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r3, x0, r17; nops
169+
; CHECK-NEXT: vextract.s32 r11, x2, r17
170+
; CHECK-NEXT: movx r16, #0
171+
; CHECK-NEXT: mova r17, #4
172+
; CHECK-NEXT: vextract.s32 r1, x0, r18
173+
; CHECK-NEXT: vextract.s32 r2, x0, r19
174+
; CHECK-NEXT: vextract.s32 r9, x2, r18
175+
; CHECK-NEXT: vextract.s32 r10, x2, r19
176+
; CHECK-NEXT: vextract.s32 r12, x2, r17
177+
; CHECK-NEXT: vextract.s32 r0, x0, r16
178+
; CHECK-NEXT: nop
179+
; CHECK-NEXT: mova r16, #4
180+
; CHECK-NEXT: vextract.s32 r4, x0, r16
181+
; CHECK-NEXT: movx r17, #5
182+
; CHECK-NEXT: mova r16, #5
183+
; CHECK-NEXT: vextract.s32 r13, x2, r17
184+
; CHECK-NEXT: vextract.s32 r5, x0, r16
185+
; CHECK-NEXT: nop
186+
; CHECK-NEXT: mova r16, #6
187+
; CHECK-NEXT: vextract.s32 r6, x0, r16
188+
; CHECK-NEXT: nop
189+
; CHECK-NEXT: mova r16, #7
190+
; CHECK-NEXT: vextract.s32 r7, x0, r16
191+
; CHECK-NEXT: nop
192+
; CHECK-NEXT: mova r16, #0
193+
; CHECK-NEXT: vextract.s32 r8, x2, r16
194+
; CHECK-NEXT: nop
195+
; CHECK-NEXT: mova r16, #7
196+
; CHECK-NEXT: vextract.s32 r14, x2, r16
197+
; CHECK-NEXT: nop
198+
; CHECK-NEXT: mova r16, #6
199+
; CHECK-NEXT: vextract.s32 r15, x2, r16
200+
; CHECK-NEXT: nop
201+
; CHECK-NEXT: .p2align 4
202+
; CHECK-NEXT: .LBB1_3: // %cleanup
203+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv
204+
; CHECK-NEXT: mov r18, r26
205+
; CHECK-NEXT: mov r17, r25
206+
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
207+
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
208+
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
209+
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
210+
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
211+
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
212+
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
213+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
214+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
215+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
216+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
217+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
218+
; CHECK-NEXT: ret lr
219+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
220+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
221+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
222+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
223+
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
224+
entry:
225+
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
226+
%cmp = icmp eq i32 %idx, 0
227+
br i1 %cmp, label %if.then, label %if.end
228+
229+
if.then:
230+
%shuffle1 = shufflevector <16 x i32> %shuffle, <16 x i32> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
231+
br label %cleanup
232+
233+
if.end: ;
234+
%shuffle2 = shufflevector <16 x i32> %a, <16 x i32> %shuffle, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
235+
br label %cleanup
236+
237+
cleanup:
238+
%retval.0 = phi <16 x i32> [ %shuffle1, %if.then ], [ %shuffle2, %if.end ]
239+
ret <16 x i32> %retval.0
240+
}
241+
242+
define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) {
243+
; CHECK-LABEL: test_concat_vector:
244+
; CHECK: .p2align 4
245+
; CHECK-NEXT: // %bb.0: // %entry
246+
; CHECK-NEXT: nopa ; nopx ; mov r24, r16
247+
; CHECK-NEXT: mova r16, #0
248+
; CHECK-NEXT: vextract.s32 r0, x2, r16
249+
; CHECK-NEXT: vextract.s32 r1, x4, r16
250+
; CHECK-NEXT: nop
251+
; CHECK-NEXT: mova r16, #1
252+
; CHECK-NEXT: vextract.s32 r2, x2, r16
253+
; CHECK-NEXT: vextract.s32 r3, x4, r16
254+
; CHECK-NEXT: nop
255+
; CHECK-NEXT: mova r16, #2
256+
; CHECK-NEXT: vextract.s32 r4, x2, r16
257+
; CHECK-NEXT: vextract.s32 r5, x4, r16
258+
; CHECK-NEXT: nop
259+
; CHECK-NEXT: mova r16, #3
260+
; CHECK-NEXT: vextract.s32 r6, x2, r16
261+
; CHECK-NEXT: vextract.s32 r7, x4, r16
262+
; CHECK-NEXT: nop
263+
; CHECK-NEXT: mova r16, #4
264+
; CHECK-NEXT: vextract.s32 r8, x2, r16
265+
; CHECK-NEXT: vextract.s32 r9, x4, r16
266+
; CHECK-NEXT: nop
267+
; CHECK-NEXT: mova r16, #5
268+
; CHECK-NEXT: vextract.s32 r10, x2, r16
269+
; CHECK-NEXT: vextract.s32 r11, x4, r16
270+
; CHECK-NEXT: nop
271+
; CHECK-NEXT: mova r16, #7
272+
; CHECK-NEXT: vextract.s32 r12, x2, r16
273+
; CHECK-NEXT: vextract.s32 r13, x4, r16
274+
; CHECK-NEXT: nop
275+
; CHECK-NEXT: mova r16, #6
276+
; CHECK-NEXT: vextract.s32 r14, x2, r16
277+
; CHECK-NEXT: vextract.s32 r15, x4, r16
278+
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
279+
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
280+
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
281+
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
282+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
283+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
284+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
285+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
286+
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
287+
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
288+
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
289+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
290+
; CHECK-NEXT: ret lr
291+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
292+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
293+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
294+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
295+
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
296+
entry:
297+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
298+
ret <16 x i32> %shuffle
299+
}
300+
301+
define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
302+
; CHECK-LABEL: test_set_vector:
303+
; CHECK: .p2align 4
304+
; CHECK-NEXT: // %bb.0: // %entry
305+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16
306+
; CHECK-NEXT: mova r16, #0
307+
; CHECK-NEXT: vextract.s32 r1, x2, r16
308+
; CHECK-NEXT: eqz r0, r0
309+
; CHECK-NEXT: mova r16, #1
310+
; CHECK-NEXT: vextract.s32 r2, x2, r16
311+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
312+
; CHECK-NEXT: mova r16, #2
313+
; CHECK-NEXT: vextract.s32 r3, x2, r16
314+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
315+
; CHECK-NEXT: mova r16, #3
316+
; CHECK-NEXT: vextract.s32 r4, x2, r16
317+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
318+
; CHECK-NEXT: mova r16, #4
319+
; CHECK-NEXT: vextract.s32 r5, x2, r16
320+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
321+
; CHECK-NEXT: mova r16, #5
322+
; CHECK-NEXT: vextract.s32 r6, x2, r16
323+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
324+
; CHECK-NEXT: mova r16, #6
325+
; CHECK-NEXT: vextract.s32 r7, x2, r16
326+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
327+
; CHECK-NEXT: mova r16, #7
328+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
329+
; CHECK-NEXT: vextract.s32 r8, x2, r16
330+
; CHECK-NEXT: add r16, r0, #-1
331+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
332+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
333+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
334+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
335+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
336+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
337+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
338+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0
339+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
340+
; CHECK-NEXT: vpush.lo.32 x2, r8, x0
341+
; CHECK-NEXT: vpush.lo.32 x2, r7, x2
342+
; CHECK-NEXT: vpush.lo.32 x2, r6, x2
343+
; CHECK-NEXT: vpush.lo.32 x2, r5, x2
344+
; CHECK-NEXT: vpush.lo.32 x2, r4, x2
345+
; CHECK-NEXT: vpush.lo.32 x2, r3, x2
346+
; CHECK-NEXT: vpush.lo.32 x2, r2, x2
347+
; CHECK-NEXT: vpush.lo.32 x2, r1, x2
348+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
349+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
350+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
351+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
352+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
353+
; CHECK-NEXT: ret lr
354+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5
355+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4
356+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 3
357+
; CHECK-NEXT: vsel.32 x0, x0, x2, r16 // Delay Slot 2
358+
; CHECK-NEXT: mov r16, r9 // Delay Slot 1
359+
entry:
360+
%cmp = icmp eq i32 %idx, 0
361+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
362+
%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
363+
%retval.0 = select i1 %cmp, <16 x i32> %shuffle, <16 x i32> %shuffle1
364+
ret <16 x i32> %retval.0
365+
}
366+
367+
define i32 @test_extract_elem(<8 x i32> noundef %a, i32 noundef %idx) {
368+
; CHECK-LABEL: test_extract_elem:
369+
; CHECK: .p2align 4
370+
; CHECK-NEXT: // %bb.0: // %entry
371+
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
372+
; CHECK-NEXT: mov r2, r16 // Delay Slot 5
373+
; CHECK-NEXT: mov r16, r1 // Delay Slot 4
374+
; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 3
375+
; CHECK-NEXT: nop // Delay Slot 2
376+
; CHECK-NEXT: mov r16, r2 // Delay Slot 1
377+
entry:
378+
%vecext = extractelement <8 x i32> %a, i32 %idx
379+
ret i32 %vecext
380+
}

0 commit comments

Comments
 (0)