Skip to content

Commit 3e692b4

Browse files
[AIE2] Tests for instrinsic lowering using shufflevector
1 parent 7c281a9 commit 3e692b4

File tree

1 file changed

+369
-0
lines changed

1 file changed

+369
-0
lines changed
Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
;
3+
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
8+
; RUN: llc -O2 -mtriple=aie2 -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s
9+
10+
define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
11+
; CHECK-LABEL: test_extract_vector:
12+
; CHECK: .p2align 4
13+
; CHECK-NEXT: // %bb.0: // %entry
14+
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB0_2; nopv
15+
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
16+
; CHECK-NEXT: nop // Delay Slot 4
17+
; CHECK-NEXT: nop // Delay Slot 3
18+
; CHECK-NEXT: nop // Delay Slot 2
19+
; CHECK-NEXT: mov r8, r16 // Delay Slot 1
20+
; CHECK-NEXT: // %bb.1: // %if.end
21+
; CHECK-NEXT: mova r16, #8
22+
; CHECK-NEXT: vextract.s32 r0, x2, r16
23+
; CHECK-NEXT: nop
24+
; CHECK-NEXT: mova r16, #9
25+
; CHECK-NEXT: vextract.s32 r1, x2, r16
26+
; CHECK-NEXT: nop
27+
; CHECK-NEXT: mova r16, #10
28+
; CHECK-NEXT: vextract.s32 r2, x2, r16
29+
; CHECK-NEXT: nop
30+
; CHECK-NEXT: mova r16, #11
31+
; CHECK-NEXT: vextract.s32 r3, x2, r16
32+
; CHECK-NEXT: nop
33+
; CHECK-NEXT: mova r16, #12
34+
; CHECK-NEXT: vextract.s32 r4, x2, r16
35+
; CHECK-NEXT: nop
36+
; CHECK-NEXT: mova r16, #13
37+
; CHECK-NEXT: vextract.s32 r5, x2, r16
38+
; CHECK-NEXT: j #.LBB0_3
39+
; CHECK-NEXT: nop // Delay Slot 5
40+
; CHECK-NEXT: mova r16, #15 // Delay Slot 4
41+
; CHECK-NEXT: vextract.s32 r6, x2, r16 // Delay Slot 3
42+
; CHECK-NEXT: nop // Delay Slot 2
43+
; CHECK-NEXT: mova r16, #14 // Delay Slot 1
44+
; CHECK-NEXT: .p2align 4
45+
; CHECK-NEXT: .LBB0_2: // %if.then
46+
; CHECK-NEXT: mova r16, #0; nopxm
47+
; CHECK-NEXT: vextract.s32 r0, x2, r16
48+
; CHECK-NEXT: nop
49+
; CHECK-NEXT: mova r16, #1
50+
; CHECK-NEXT: vextract.s32 r1, x2, r16
51+
; CHECK-NEXT: nop
52+
; CHECK-NEXT: mova r16, #2
53+
; CHECK-NEXT: vextract.s32 r2, x2, r16
54+
; CHECK-NEXT: nop
55+
; CHECK-NEXT: mova r16, #3
56+
; CHECK-NEXT: vextract.s32 r3, x2, r16
57+
; CHECK-NEXT: nop
58+
; CHECK-NEXT: mova r16, #4
59+
; CHECK-NEXT: vextract.s32 r4, x2, r16
60+
; CHECK-NEXT: nop
61+
; CHECK-NEXT: mova r16, #5
62+
; CHECK-NEXT: vextract.s32 r5, x2, r16
63+
; CHECK-NEXT: nop
64+
; CHECK-NEXT: mova r16, #7
65+
; CHECK-NEXT: vextract.s32 r6, x2, r16
66+
; CHECK-NEXT: nop
67+
; CHECK-NEXT: mova r16, #6
68+
; CHECK-NEXT: .p2align 4
69+
; CHECK-NEXT: .LBB0_3: // %return
70+
; CHECK-NEXT: nopx ; vextract.s32 r7, x2, r16
71+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
72+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
73+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
74+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
75+
; CHECK-NEXT: ret lr
76+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
77+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
78+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
79+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
80+
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
81+
entry:
82+
%cmp = icmp eq i32 %idx, 0
83+
br i1 %cmp, label %if.then, label %if.end
84+
85+
if.then:
86+
%shuffle = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
87+
br label %return
88+
89+
if.end:
90+
%shuffle1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
91+
br label %return
92+
93+
return:
94+
%retval.0 = phi <8 x i32> [ %shuffle, %if.then ], [ %shuffle1, %if.end ]
95+
ret <8 x i32> %retval.0
96+
}
97+
98+
define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <8 x i32> noundef %b) {
99+
; CHECK-LABEL: test_insert_vector:
100+
; CHECK: .p2align 4
101+
; CHECK-NEXT: // %bb.0: // %entry
102+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r25, r17
103+
; CHECK-NEXT: mov r26, r18
104+
; CHECK-NEXT: mov r27, r19
105+
; CHECK-NEXT: mova r19, #0
106+
; CHECK-NEXT: mova r18, #1
107+
; CHECK-NEXT: mova r17, #2
108+
; CHECK-NEXT: mov r24, r16
109+
; CHECK-NEXT: mova r16, #3
110+
; CHECK-NEXT: vextract.s32 r4, x4, r16
111+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
112+
; CHECK-NEXT: mova r16, #4
113+
; CHECK-NEXT: vextract.s32 r1, x4, r19
114+
; CHECK-NEXT: vextract.s32 r2, x4, r18
115+
; CHECK-NEXT: vextract.s32 r3, x4, r17
116+
; CHECK-NEXT: vextract.s32 r5, x4, r16
117+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
118+
; CHECK-NEXT: mova r16, #5
119+
; CHECK-NEXT: vextract.s32 r6, x4, r16
120+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
121+
; CHECK-NEXT: mova r16, #6
122+
; CHECK-NEXT: vextract.s32 r7, x4, r16
123+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
124+
; CHECK-NEXT: mova r16, #7
125+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
126+
; CHECK-NEXT: vextract.s32 r8, x4, r16
127+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
128+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
129+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
130+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
131+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
132+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
133+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
134+
; CHECK-NEXT: jz r0, #.LBB1_2
135+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 5
136+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 4
137+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
138+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 2
139+
; CHECK-NEXT: nop // Delay Slot 1
140+
; CHECK-NEXT: // %bb.1: // %if.end
141+
; CHECK-NEXT: nopx ; vextract.s32 r12, x2, r16
142+
; CHECK-NEXT: vextract.s32 r13, x0, r16
143+
; CHECK-NEXT: vextract.s32 r4, x2, r17
144+
; CHECK-NEXT: vextract.s32 r5, x0, r17
145+
; CHECK-NEXT: nop
146+
; CHECK-NEXT: mova r17, #3
147+
; CHECK-NEXT: vextract.s32 r0, x2, r19
148+
; CHECK-NEXT: vextract.s32 r1, x0, r19
149+
; CHECK-NEXT: vextract.s32 r2, x2, r18
150+
; CHECK-NEXT: vextract.s32 r3, x0, r18
151+
; CHECK-NEXT: vextract.s32 r6, x2, r17
152+
; CHECK-NEXT: vextract.s32 r7, x0, r17
153+
; CHECK-NEXT: movx r16, #6
154+
; CHECK-NEXT: mova r17, #4
155+
; CHECK-NEXT: vextract.s32 r14, x2, r16
156+
; CHECK-NEXT: vextract.s32 r15, x0, r16
157+
; CHECK-NEXT: vextract.s32 r8, x2, r17
158+
; CHECK-NEXT: vextract.s32 r9, x0, r17
159+
; CHECK-NEXT: j #.LBB1_3
160+
; CHECK-NEXT: nop // Delay Slot 5
161+
; CHECK-NEXT: mova r17, #5 // Delay Slot 4
162+
; CHECK-NEXT: vextract.s32 r10, x2, r17 // Delay Slot 3
163+
; CHECK-NEXT: vextract.s32 r11, x0, r17 // Delay Slot 2
164+
; CHECK-NEXT: nop // Delay Slot 1
165+
; CHECK-NEXT: .p2align 4
166+
; CHECK-NEXT: .LBB1_2: // %if.then
167+
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r12, x0, r16; nops
168+
; CHECK-NEXT: vextract.s32 r13, x2, r16
169+
; CHECK-NEXT: vextract.s32 r4, x0, r17
170+
; CHECK-NEXT: vextract.s32 r5, x2, r17
171+
; CHECK-NEXT: nop
172+
; CHECK-NEXT: mova r17, #3
173+
; CHECK-NEXT: vextract.s32 r0, x0, r19
174+
; CHECK-NEXT: vextract.s32 r1, x2, r19
175+
; CHECK-NEXT: vextract.s32 r2, x0, r18
176+
; CHECK-NEXT: vextract.s32 r3, x2, r18
177+
; CHECK-NEXT: vextract.s32 r6, x0, r17
178+
; CHECK-NEXT: vextract.s32 r7, x2, r17
179+
; CHECK-NEXT: movx r16, #6
180+
; CHECK-NEXT: mova r17, #4
181+
; CHECK-NEXT: vextract.s32 r14, x0, r16
182+
; CHECK-NEXT: vextract.s32 r15, x2, r16
183+
; CHECK-NEXT: vextract.s32 r8, x0, r17
184+
; CHECK-NEXT: vextract.s32 r9, x2, r17
185+
; CHECK-NEXT: nop
186+
; CHECK-NEXT: mova r17, #5
187+
; CHECK-NEXT: vextract.s32 r10, x0, r17
188+
; CHECK-NEXT: vextract.s32 r11, x2, r17
189+
; CHECK-NEXT: nop
190+
; CHECK-NEXT: .p2align 4
191+
; CHECK-NEXT: .LBB1_3: // %cleanup
192+
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv
193+
; CHECK-NEXT: mov r18, r26
194+
; CHECK-NEXT: mov r17, r25
195+
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
196+
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
197+
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
198+
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
199+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
200+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
201+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
202+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
203+
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
204+
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
205+
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
206+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
207+
; CHECK-NEXT: ret lr
208+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
209+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
210+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
211+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
212+
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
213+
entry:
214+
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
215+
%cmp = icmp eq i32 %idx, 0
216+
br i1 %cmp, label %if.then, label %if.end
217+
218+
if.then:
219+
%shuffle1 = shufflevector <16 x i32> %shuffle, <16 x i32> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
220+
br label %cleanup
221+
222+
if.end: ;
223+
%shuffle2 = shufflevector <16 x i32> %a, <16 x i32> %shuffle, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
224+
br label %cleanup
225+
226+
cleanup:
227+
%retval.0 = phi <16 x i32> [ %shuffle1, %if.then ], [ %shuffle2, %if.end ]
228+
ret <16 x i32> %retval.0
229+
}
230+
231+
define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) {
232+
; CHECK-LABEL: test_concat_vector:
233+
; CHECK: .p2align 4
234+
; CHECK-NEXT: // %bb.0: // %entry
235+
; CHECK-NEXT: nopa ; nopx ; mov r24, r16
236+
; CHECK-NEXT: mova r16, #0
237+
; CHECK-NEXT: vextract.s32 r0, x2, r16
238+
; CHECK-NEXT: vextract.s32 r1, x4, r16
239+
; CHECK-NEXT: nop
240+
; CHECK-NEXT: mova r16, #1
241+
; CHECK-NEXT: vextract.s32 r2, x2, r16
242+
; CHECK-NEXT: vextract.s32 r3, x4, r16
243+
; CHECK-NEXT: nop
244+
; CHECK-NEXT: mova r16, #2
245+
; CHECK-NEXT: vextract.s32 r4, x2, r16
246+
; CHECK-NEXT: vextract.s32 r5, x4, r16
247+
; CHECK-NEXT: nop
248+
; CHECK-NEXT: mova r16, #3
249+
; CHECK-NEXT: vextract.s32 r6, x2, r16
250+
; CHECK-NEXT: vextract.s32 r7, x4, r16
251+
; CHECK-NEXT: nop
252+
; CHECK-NEXT: mova r16, #4
253+
; CHECK-NEXT: vextract.s32 r8, x2, r16
254+
; CHECK-NEXT: vextract.s32 r9, x4, r16
255+
; CHECK-NEXT: nop
256+
; CHECK-NEXT: mova r16, #5
257+
; CHECK-NEXT: vextract.s32 r10, x2, r16
258+
; CHECK-NEXT: vextract.s32 r11, x4, r16
259+
; CHECK-NEXT: nop
260+
; CHECK-NEXT: mova r16, #7
261+
; CHECK-NEXT: vextract.s32 r12, x2, r16
262+
; CHECK-NEXT: vextract.s32 r13, x4, r16
263+
; CHECK-NEXT: nop
264+
; CHECK-NEXT: mova r16, #6
265+
; CHECK-NEXT: vextract.s32 r14, x2, r16
266+
; CHECK-NEXT: vextract.s32 r15, x4, r16
267+
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
268+
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
269+
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
270+
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
271+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
272+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
273+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
274+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
275+
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
276+
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
277+
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
278+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
279+
; CHECK-NEXT: ret lr
280+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
281+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
282+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
283+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
284+
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
285+
entry:
286+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
287+
ret <16 x i32> %shuffle
288+
}
289+
290+
define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
291+
; CHECK-LABEL: test_set_vector:
292+
; CHECK: .p2align 4
293+
; CHECK-NEXT: // %bb.0: // %entry
294+
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16
295+
; CHECK-NEXT: mova r16, #0
296+
; CHECK-NEXT: vextract.s32 r1, x2, r16
297+
; CHECK-NEXT: eqz r0, r0
298+
; CHECK-NEXT: mova r16, #1
299+
; CHECK-NEXT: vextract.s32 r2, x2, r16
300+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
301+
; CHECK-NEXT: mova r16, #2
302+
; CHECK-NEXT: vextract.s32 r3, x2, r16
303+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
304+
; CHECK-NEXT: mova r16, #3
305+
; CHECK-NEXT: vextract.s32 r4, x2, r16
306+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
307+
; CHECK-NEXT: mova r16, #4
308+
; CHECK-NEXT: vextract.s32 r5, x2, r16
309+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
310+
; CHECK-NEXT: mova r16, #5
311+
; CHECK-NEXT: vextract.s32 r6, x2, r16
312+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
313+
; CHECK-NEXT: mova r16, #6
314+
; CHECK-NEXT: vextract.s32 r7, x2, r16
315+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
316+
; CHECK-NEXT: mova r16, #7
317+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
318+
; CHECK-NEXT: vextract.s32 r8, x2, r16
319+
; CHECK-NEXT: add r16, r0, #-1
320+
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
321+
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
322+
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
323+
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
324+
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
325+
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
326+
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
327+
; CHECK-NEXT: vpush.lo.32 x0, r2, x0
328+
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
329+
; CHECK-NEXT: vpush.lo.32 x2, r8, x0
330+
; CHECK-NEXT: vpush.lo.32 x2, r7, x2
331+
; CHECK-NEXT: vpush.lo.32 x2, r6, x2
332+
; CHECK-NEXT: vpush.lo.32 x2, r5, x2
333+
; CHECK-NEXT: vpush.lo.32 x2, r4, x2
334+
; CHECK-NEXT: vpush.lo.32 x2, r3, x2
335+
; CHECK-NEXT: vpush.lo.32 x2, r2, x2
336+
; CHECK-NEXT: vpush.lo.32 x2, r1, x2
337+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
338+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
339+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
340+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
341+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
342+
; CHECK-NEXT: ret lr
343+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5
344+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4
345+
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 3
346+
; CHECK-NEXT: vsel.32 x0, x0, x2, r16 // Delay Slot 2
347+
; CHECK-NEXT: mov r16, r9 // Delay Slot 1
348+
entry:
349+
%cmp = icmp eq i32 %idx, 0
350+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
351+
%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
352+
%retval.0 = select i1 %cmp, <16 x i32> %shuffle, <16 x i32> %shuffle1
353+
ret <16 x i32> %retval.0
354+
}
355+
356+
define i32 @test_extract_elem(<8 x i32> noundef %a, i32 noundef %idx) {
357+
; CHECK-LABEL: test_extract_elem:
358+
; CHECK: .p2align 4
359+
; CHECK-NEXT: // %bb.0: // %entry
360+
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
361+
; CHECK-NEXT: mov r2, r16 // Delay Slot 5
362+
; CHECK-NEXT: mov r16, r1 // Delay Slot 4
363+
; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 3
364+
; CHECK-NEXT: nop // Delay Slot 2
365+
; CHECK-NEXT: mov r16, r2 // Delay Slot 1
366+
entry:
367+
%vecext = extractelement <8 x i32> %a, i32 %idx
368+
ret i32 %vecext
369+
}

0 commit comments

Comments
 (0)