@@ -99,100 +99,79 @@ define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <
9999; CHECK-LABEL: test_insert_vector:
100100; CHECK: .p2align 4
101101; CHECK-NEXT: // %bb.0: // %entry
102- ; CHECK-NEXT: nopx ; mov r25, r17
103- ; CHECK-NEXT: mov r26, r18
104- ; CHECK-NEXT: mov r27, r19
105- ; CHECK-NEXT: mova r19, #0
106- ; CHECK-NEXT: mova r18, #1
107- ; CHECK-NEXT: mov r24, r16
108- ; CHECK-NEXT: mova r16, #3
109- ; CHECK-NEXT: vextract.s32 r4, x4, r16
110- ; CHECK-NEXT: movx r17, #2
111- ; CHECK-NEXT: mova r16, #4
112- ; CHECK-NEXT: vextract.s32 r1, x4, r19
113- ; CHECK-NEXT: vextract.s32 r2, x4, r18
114- ; CHECK-NEXT: vextract.s32 r3, x4, r17
115- ; CHECK-NEXT: vextract.s32 r5, x4, r16
102+ ; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB1_2; nopv
103+ ; CHECK-NEXT: nopx // Delay Slot 5
104+ ; CHECK-NEXT: nop // Delay Slot 4
105+ ; CHECK-NEXT: nop // Delay Slot 3
106+ ; CHECK-NEXT: mov r24, r16 // Delay Slot 2
107+ ; CHECK-NEXT: mova r16, #0 // Delay Slot 1
108+ ; CHECK-NEXT: // %bb.1: // %if.end
109+ ; CHECK-NEXT: vextract.s32 r0, x2, r16
110+ ; CHECK-NEXT: vextract.s32 r1, x4, r16
116111; CHECK-NEXT: nop
117- ; CHECK-NEXT: mova r16, #5
118- ; CHECK-NEXT: vextract.s32 r6, x4, r16
112+ ; CHECK-NEXT: mova r16, #1
113+ ; CHECK-NEXT: vextract.s32 r2, x2, r16
114+ ; CHECK-NEXT: vextract.s32 r3, x4, r16
119115; CHECK-NEXT: nop
120- ; CHECK-NEXT: mova r16, #7
121- ; CHECK-NEXT: vextract.s32 r7, x4, r16
116+ ; CHECK-NEXT: mova r16, #2
117+ ; CHECK-NEXT: vextract.s32 r4, x2, r16
118+ ; CHECK-NEXT: vextract.s32 r5, x4, r16
122119; CHECK-NEXT: nop
123- ; CHECK-NEXT: mova r16, #6
124- ; CHECK-NEXT: vextract.s32 r8, x4, r16
125- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
126- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
127- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0
128- ; CHECK-NEXT: jz r0, #.LBB1_2
129- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0 // Delay Slot 5
130- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
131- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 3
132- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 2
133- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 1
134- ; CHECK-NEXT: // %bb.1: // %if.end
135- ; CHECK-NEXT: nopb ; mova r16, #3; nops ; nopxm ; nopv
136- ; CHECK-NEXT: vextract.s32 r0, x2, r19
137- ; CHECK-NEXT: vextract.s32 r1, x0, r19
138- ; CHECK-NEXT: vextract.s32 r2, x2, r18
139- ; CHECK-NEXT: vextract.s32 r3, x0, r18
140- ; CHECK-NEXT: vextract.s32 r4, x2, r17
141- ; CHECK-NEXT: vextract.s32 r5, x0, r17
120+ ; CHECK-NEXT: mova r16, #3
142121; CHECK-NEXT: vextract.s32 r6, x2, r16
143- ; CHECK-NEXT: vextract.s32 r7, x0 , r16
122+ ; CHECK-NEXT: vextract.s32 r7, x4 , r16
144123; CHECK-NEXT: nop
145124; CHECK-NEXT: mova r16, #4
146125; CHECK-NEXT: vextract.s32 r8, x2, r16
147- ; CHECK-NEXT: vextract.s32 r9, x0 , r16
126+ ; CHECK-NEXT: vextract.s32 r9, x4 , r16
148127; CHECK-NEXT: nop
149128; CHECK-NEXT: mova r16, #5
150129; CHECK-NEXT: vextract.s32 r10, x2, r16
151- ; CHECK-NEXT: vextract.s32 r11, x0 , r16
130+ ; CHECK-NEXT: vextract.s32 r11, x4 , r16
152131; CHECK-NEXT: nop
153132; CHECK-NEXT: mova r16, #7
154133; CHECK-NEXT: vextract.s32 r12, x2, r16
155- ; CHECK-NEXT: vextract.s32 r13, x0, r16
156134; CHECK-NEXT: j #.LBB1_3
157- ; CHECK-NEXT: nop // Delay Slot 5
158- ; CHECK-NEXT: mova r16, #6 // Delay Slot 4
159- ; CHECK-NEXT: vextract.s32 r14, x2, r16 // Delay Slot 3
160- ; CHECK-NEXT: vextract.s32 r15, x0 , r16 // Delay Slot 2
161- ; CHECK-NEXT: nop // Delay Slot 1
135+ ; CHECK-NEXT: vextract.s32 r13, x4, r16 // Delay Slot 5
136+ ; CHECK-NEXT: nop // Delay Slot 4
137+ ; CHECK-NEXT: mova r16, #6 // Delay Slot 3
138+ ; CHECK-NEXT: vextract.s32 r14, x2 , r16 // Delay Slot 2
139+ ; CHECK-NEXT: vextract.s32 r15, x4, r16 // Delay Slot 1
162140; CHECK-NEXT: .p2align 4
163141; CHECK-NEXT: .LBB1_2: // %if.then
164- ; CHECK-NEXT: mova r16, #3; nopx
165- ; CHECK-NEXT: vextract.s32 r0, x0, r19
166- ; CHECK-NEXT: vextract.s32 r1, x2, r19
167- ; CHECK-NEXT: vextract.s32 r2, x0, r18
168- ; CHECK-NEXT: vextract.s32 r3, x2, r18
169- ; CHECK-NEXT: vextract.s32 r4, x0, r17
170- ; CHECK-NEXT: vextract.s32 r5, x2, r17
171- ; CHECK-NEXT: vextract.s32 r6, x0, r16
142+ ; CHECK-NEXT: nopa ; nopx ; vextract.s32 r0, x4, r16
143+ ; CHECK-NEXT: vextract.s32 r1, x2, r16
144+ ; CHECK-NEXT: nop
145+ ; CHECK-NEXT: mova r16, #1
146+ ; CHECK-NEXT: vextract.s32 r2, x4, r16
147+ ; CHECK-NEXT: vextract.s32 r3, x2, r16
148+ ; CHECK-NEXT: nop
149+ ; CHECK-NEXT: mova r16, #2
150+ ; CHECK-NEXT: vextract.s32 r4, x4, r16
151+ ; CHECK-NEXT: vextract.s32 r5, x2, r16
152+ ; CHECK-NEXT: nop
153+ ; CHECK-NEXT: mova r16, #3
154+ ; CHECK-NEXT: vextract.s32 r6, x4, r16
172155; CHECK-NEXT: vextract.s32 r7, x2, r16
173156; CHECK-NEXT: nop
174157; CHECK-NEXT: mova r16, #4
175- ; CHECK-NEXT: vextract.s32 r8, x0 , r16
158+ ; CHECK-NEXT: vextract.s32 r8, x4 , r16
176159; CHECK-NEXT: vextract.s32 r9, x2, r16
177160; CHECK-NEXT: nop
178161; CHECK-NEXT: mova r16, #5
179- ; CHECK-NEXT: vextract.s32 r10, x0 , r16
162+ ; CHECK-NEXT: vextract.s32 r10, x4 , r16
180163; CHECK-NEXT: vextract.s32 r11, x2, r16
181164; CHECK-NEXT: nop
182165; CHECK-NEXT: mova r16, #7
183- ; CHECK-NEXT: vextract.s32 r12, x0 , r16
166+ ; CHECK-NEXT: vextract.s32 r12, x4 , r16
184167; CHECK-NEXT: vextract.s32 r13, x2, r16
185168; CHECK-NEXT: nop
186169; CHECK-NEXT: mova r16, #6
187- ; CHECK-NEXT: vextract.s32 r14, x0 , r16
170+ ; CHECK-NEXT: vextract.s32 r14, x4 , r16
188171; CHECK-NEXT: vextract.s32 r15, x2, r16
189- ; CHECK-NEXT: nop
190172; CHECK-NEXT: .p2align 4
191173; CHECK-NEXT: .LBB1_3: // %cleanup
192- ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv
193- ; CHECK-NEXT: mov r18, r26
194- ; CHECK-NEXT: mov r17, r25
195- ; CHECK-NEXT: vpush.lo.32 x0, r13, x0
174+ ; CHECK-NEXT: nopa ; nopb ; nopx ; vpush.lo.32 x0, r13, x0
196175; CHECK-NEXT: vpush.lo.32 x0, r15, x0
197176; CHECK-NEXT: vpush.lo.32 x0, r11, x0
198177; CHECK-NEXT: vpush.lo.32 x0, r9, x0
@@ -232,56 +211,12 @@ define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b
232211; CHECK-LABEL: test_concat_vector:
233212; CHECK: .p2align 4
234213; CHECK-NEXT: // %bb.0: // %entry
235- ; CHECK-NEXT: nopa ; nopx ; mov r24, r16
236- ; CHECK-NEXT: mova r16, #0
237- ; CHECK-NEXT: vextract.s32 r0, x2, r16
238- ; CHECK-NEXT: vextract.s32 r1, x4, r16
239- ; CHECK-NEXT: nop
240- ; CHECK-NEXT: mova r16, #1
241- ; CHECK-NEXT: vextract.s32 r2, x2, r16
242- ; CHECK-NEXT: vextract.s32 r3, x4, r16
243- ; CHECK-NEXT: nop
244- ; CHECK-NEXT: mova r16, #2
245- ; CHECK-NEXT: vextract.s32 r4, x2, r16
246- ; CHECK-NEXT: vextract.s32 r5, x4, r16
247- ; CHECK-NEXT: nop
248- ; CHECK-NEXT: mova r16, #3
249- ; CHECK-NEXT: vextract.s32 r6, x2, r16
250- ; CHECK-NEXT: vextract.s32 r7, x4, r16
251- ; CHECK-NEXT: nop
252- ; CHECK-NEXT: mova r16, #4
253- ; CHECK-NEXT: vextract.s32 r8, x2, r16
254- ; CHECK-NEXT: vextract.s32 r9, x4, r16
255- ; CHECK-NEXT: nop
256- ; CHECK-NEXT: mova r16, #5
257- ; CHECK-NEXT: vextract.s32 r10, x2, r16
258- ; CHECK-NEXT: vextract.s32 r11, x4, r16
259- ; CHECK-NEXT: nop
260- ; CHECK-NEXT: mova r16, #7
261- ; CHECK-NEXT: vextract.s32 r12, x2, r16
262- ; CHECK-NEXT: vextract.s32 r13, x4, r16
263- ; CHECK-NEXT: nop
264- ; CHECK-NEXT: mova r16, #6
265- ; CHECK-NEXT: vextract.s32 r14, x2, r16
266- ; CHECK-NEXT: vextract.s32 r15, x4, r16
267- ; CHECK-NEXT: vpush.lo.32 x0, r13, x0
268- ; CHECK-NEXT: vpush.lo.32 x0, r15, x0
269- ; CHECK-NEXT: vpush.lo.32 x0, r11, x0
270- ; CHECK-NEXT: vpush.lo.32 x0, r9, x0
271- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
272- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0
273- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0
274- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0
275- ; CHECK-NEXT: vpush.lo.32 x0, r12, x0
276- ; CHECK-NEXT: vpush.lo.32 x0, r14, x0
277- ; CHECK-NEXT: vpush.lo.32 x0, r10, x0
278- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
279- ; CHECK-NEXT: ret lr
280- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
281- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
282- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
283- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
284- ; CHECK-NEXT: mov r16, r24 // Delay Slot 1
214+ ; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
215+ ; CHECK-NEXT: nopx // Delay Slot 5
216+ ; CHECK-NEXT: nop // Delay Slot 4
217+ ; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 3
218+ ; CHECK-NEXT: vmov wh0, wl4 // Delay Slot 2
219+ ; CHECK-NEXT: nop // Delay Slot 1
285220entry:
286221 %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > %b , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
287222 ret <16 x i32 > %shuffle
@@ -291,50 +226,14 @@ define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
291226; CHECK-LABEL: test_set_vector:
292227; CHECK: .p2align 4
293228; CHECK-NEXT: // %bb.0: // %entry
294- ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r9, r16; nopv
295- ; CHECK-NEXT: mova r16, #0
296- ; CHECK-NEXT: vextract.s32 r1, x2, r16
297- ; CHECK-NEXT: nop
298- ; CHECK-NEXT: mova r16, #1
299- ; CHECK-NEXT: vextract.s32 r2, x2, r16
300- ; CHECK-NEXT: nop
301- ; CHECK-NEXT: mova r16, #2
302- ; CHECK-NEXT: vextract.s32 r3, x2, r16
303- ; CHECK-NEXT: nop
304- ; CHECK-NEXT: mova r16, #3
305- ; CHECK-NEXT: vextract.s32 r4, x2, r16
306- ; CHECK-NEXT: nop
307- ; CHECK-NEXT: mova r16, #4
308- ; CHECK-NEXT: vextract.s32 r5, x2, r16
309- ; CHECK-NEXT: nop
310- ; CHECK-NEXT: mova r16, #5
311- ; CHECK-NEXT: vextract.s32 r6, x2, r16
312- ; CHECK-NEXT: nop
313- ; CHECK-NEXT: mova r16, #7
314- ; CHECK-NEXT: vextract.s32 r7, x2, r16
229+ ; CHECK-NEXT: mov r1, r16
315230; CHECK-NEXT: eqz r0, r0
316- ; CHECK-NEXT: mova r16, #6
317- ; CHECK-NEXT: vextract.s32 r8, x2, r16
318- ; CHECK-NEXT: add r16, r0, #-1
319- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
320- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
321- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0
322- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0
323- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0
324- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0
325- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0
326- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0
327- ; CHECK-NEXT: vpush.lo.32 x2, r0, x0
328- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
329- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
330- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
331- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
332231; CHECK-NEXT: ret lr
333- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5
334- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4
335- ; CHECK-NEXT: vpush.lo.32 x2 , r0, x2 // Delay Slot 3
336- ; CHECK-NEXT: vsel.32 x0, x0, x2 , r16 // Delay Slot 2
337- ; CHECK-NEXT: mov r16, r9 // Delay Slot 1
232+ ; CHECK-NEXT: vmov wh0, wl2 // Delay Slot 5
233+ ; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 4
234+ ; CHECK-NEXT: add r16 , r0, #-1 // Delay Slot 3
235+ ; CHECK-NEXT: vsel.32 x0, x0, x0 , r16 // Delay Slot 2
236+ ; CHECK-NEXT: mov r16, r1 // Delay Slot 1
338237entry:
339238 %cmp = icmp eq i32 %idx , 0
340239 %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > undef , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
0 commit comments