@@ -99,100 +99,79 @@ define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <
9999; CHECK-LABEL: test_insert_vector:
100100; CHECK: .p2align 4
101101; CHECK-NEXT: // %bb.0: // %entry
102- ; CHECK-NEXT: nopa ; nopb ; nopx ; mov r25, r17
103- ; CHECK-NEXT: mov r26, r18
104- ; CHECK-NEXT: mov r27, r19
105- ; CHECK-NEXT: mova r19, #0
106- ; CHECK-NEXT: mova r18, #1
107- ; CHECK-NEXT: mova r17, #2
108- ; CHECK-NEXT: mov r24, r16
102+ ; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB1_2; nopv
103+ ; CHECK-NEXT: nopx // Delay Slot 5
104+ ; CHECK-NEXT: nop // Delay Slot 4
105+ ; CHECK-NEXT: nop // Delay Slot 3
106+ ; CHECK-NEXT: mov r24, r16 // Delay Slot 2
107+ ; CHECK-NEXT: mova r16, #0 // Delay Slot 1
108+ ; CHECK-NEXT: // %bb.1: // %if.end
109+ ; CHECK-NEXT: vextract.s32 r0, x2, r16
110+ ; CHECK-NEXT: vextract.s32 r1, x4, r16
111+ ; CHECK-NEXT: nop
112+ ; CHECK-NEXT: mova r16, #1
113+ ; CHECK-NEXT: vextract.s32 r2, x2, r16
114+ ; CHECK-NEXT: vextract.s32 r3, x4, r16
115+ ; CHECK-NEXT: nop
116+ ; CHECK-NEXT: mova r16, #2
117+ ; CHECK-NEXT: vextract.s32 r4, x2, r16
118+ ; CHECK-NEXT: vextract.s32 r5, x4, r16
119+ ; CHECK-NEXT: nop
109120; CHECK-NEXT: mova r16, #3
110- ; CHECK-NEXT: vextract.s32 r4, x4, r16
111- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
121+ ; CHECK-NEXT: vextract.s32 r6, x2, r16
122+ ; CHECK-NEXT: vextract.s32 r7, x4, r16
123+ ; CHECK-NEXT: nop
112124; CHECK-NEXT: mova r16, #4
113- ; CHECK-NEXT: vextract.s32 r1, x4, r19
114- ; CHECK-NEXT: vextract.s32 r2, x4, r18
115- ; CHECK-NEXT: vextract.s32 r3, x4, r17
116- ; CHECK-NEXT: vextract.s32 r5, x4, r16
117- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
125+ ; CHECK-NEXT: vextract.s32 r8, x2, r16
126+ ; CHECK-NEXT: vextract.s32 r9, x4, r16
127+ ; CHECK-NEXT: nop
118128; CHECK-NEXT: mova r16, #5
119- ; CHECK-NEXT: vextract.s32 r6, x4, r16
120- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
121- ; CHECK-NEXT: mova r16, #6
122- ; CHECK-NEXT: vextract.s32 r7, x4, r16
123- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
124- ; CHECK-NEXT: mova r16, #7
125- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
126- ; CHECK-NEXT: vextract.s32 r8, x4, r16
127- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
128- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
129- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
130- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
131- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
132- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0
133- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0
134- ; CHECK-NEXT: jz r0, #.LBB1_2
135- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 5
136- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 4
137- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
138- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 2
139- ; CHECK-NEXT: nop // Delay Slot 1
140- ; CHECK-NEXT: // %bb.1: // %if.end
141- ; CHECK-NEXT: nopx ; vextract.s32 r12, x2, r16
142- ; CHECK-NEXT: vextract.s32 r13, x0, r16
143- ; CHECK-NEXT: vextract.s32 r4, x2, r17
144- ; CHECK-NEXT: vextract.s32 r5, x0, r17
129+ ; CHECK-NEXT: vextract.s32 r10, x2, r16
130+ ; CHECK-NEXT: vextract.s32 r11, x4, r16
145131; CHECK-NEXT: nop
146- ; CHECK-NEXT: mova r17, #3
147- ; CHECK-NEXT: vextract.s32 r0, x2, r19
148- ; CHECK-NEXT: vextract.s32 r1, x0, r19
149- ; CHECK-NEXT: vextract.s32 r2, x2, r18
150- ; CHECK-NEXT: vextract.s32 r3, x0, r18
151- ; CHECK-NEXT: vextract.s32 r6, x2, r17
152- ; CHECK-NEXT: vextract.s32 r7, x0, r17
153- ; CHECK-NEXT: movx r16, #6
154- ; CHECK-NEXT: mova r17, #4
155- ; CHECK-NEXT: vextract.s32 r14, x2, r16
156- ; CHECK-NEXT: vextract.s32 r15, x0, r16
157- ; CHECK-NEXT: vextract.s32 r8, x2, r17
158- ; CHECK-NEXT: vextract.s32 r9, x0, r17
132+ ; CHECK-NEXT: mova r16, #7
133+ ; CHECK-NEXT: vextract.s32 r12, x2, r16
159134; CHECK-NEXT: j #.LBB1_3
160- ; CHECK-NEXT: nop // Delay Slot 5
161- ; CHECK-NEXT: mova r17, #5 // Delay Slot 4
162- ; CHECK-NEXT: vextract.s32 r10, x2, r17 // Delay Slot 3
163- ; CHECK-NEXT: vextract.s32 r11, x0, r17 // Delay Slot 2
164- ; CHECK-NEXT: nop // Delay Slot 1
135+ ; CHECK-NEXT: vextract.s32 r13, x4, r16 // Delay Slot 5
136+ ; CHECK-NEXT: nop // Delay Slot 4
137+ ; CHECK-NEXT: mova r16, #6 // Delay Slot 3
138+ ; CHECK-NEXT: vextract.s32 r14, x2, r16 // Delay Slot 2
139+ ; CHECK-NEXT: vextract.s32 r15, x4, r16 // Delay Slot 1
165140; CHECK-NEXT: .p2align 4
166141; CHECK-NEXT: .LBB1_2: // %if.then
167- ; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r12, x0, r16; nops
168- ; CHECK-NEXT: vextract.s32 r13, x2, r16
169- ; CHECK-NEXT: vextract.s32 r4, x0, r17
170- ; CHECK-NEXT: vextract.s32 r5, x2, r17
142+ ; CHECK-NEXT: nopa ; nopx ; vextract.s32 r0, x4, r16
143+ ; CHECK-NEXT: vextract.s32 r1, x2, r16
171144; CHECK-NEXT: nop
172- ; CHECK-NEXT: mova r17, #3
173- ; CHECK-NEXT: vextract.s32 r0, x0, r19
174- ; CHECK-NEXT: vextract.s32 r1, x2, r19
175- ; CHECK-NEXT: vextract.s32 r2, x0, r18
176- ; CHECK-NEXT: vextract.s32 r3, x2, r18
177- ; CHECK-NEXT: vextract.s32 r6, x0, r17
178- ; CHECK-NEXT: vextract.s32 r7, x2, r17
179- ; CHECK-NEXT: movx r16, #6
180- ; CHECK-NEXT: mova r17, #4
181- ; CHECK-NEXT: vextract.s32 r14, x0, r16
182- ; CHECK-NEXT: vextract.s32 r15, x2, r16
183- ; CHECK-NEXT: vextract.s32 r8, x0, r17
184- ; CHECK-NEXT: vextract.s32 r9, x2, r17
145+ ; CHECK-NEXT: mova r16, #1
146+ ; CHECK-NEXT: vextract.s32 r2, x4, r16
147+ ; CHECK-NEXT: vextract.s32 r3, x2, r16
185148; CHECK-NEXT: nop
186- ; CHECK-NEXT: mova r17 , #5
187- ; CHECK-NEXT: vextract.s32 r10, x0, r17
188- ; CHECK-NEXT: vextract.s32 r11 , x2, r17
149+ ; CHECK-NEXT: mova r16 , #2
150+ ; CHECK-NEXT: vextract.s32 r4, x4, r16
151+ ; CHECK-NEXT: vextract.s32 r5 , x2, r16
189152; CHECK-NEXT: nop
153+ ; CHECK-NEXT: mova r16, #3
154+ ; CHECK-NEXT: vextract.s32 r6, x4, r16
155+ ; CHECK-NEXT: vextract.s32 r7, x2, r16
156+ ; CHECK-NEXT: nop
157+ ; CHECK-NEXT: mova r16, #4
158+ ; CHECK-NEXT: vextract.s32 r8, x4, r16
159+ ; CHECK-NEXT: vextract.s32 r9, x2, r16
160+ ; CHECK-NEXT: nop
161+ ; CHECK-NEXT: mova r16, #5
162+ ; CHECK-NEXT: vextract.s32 r10, x4, r16
163+ ; CHECK-NEXT: vextract.s32 r11, x2, r16
164+ ; CHECK-NEXT: nop
165+ ; CHECK-NEXT: mova r16, #7
166+ ; CHECK-NEXT: vextract.s32 r12, x4, r16
167+ ; CHECK-NEXT: vextract.s32 r13, x2, r16
168+ ; CHECK-NEXT: nop
169+ ; CHECK-NEXT: mova r16, #6
170+ ; CHECK-NEXT: vextract.s32 r14, x4, r16
171+ ; CHECK-NEXT: vextract.s32 r15, x2, r16
190172; CHECK-NEXT: .p2align 4
191173; CHECK-NEXT: .LBB1_3: // %cleanup
192- ; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv
193- ; CHECK-NEXT: mov r18, r26
194- ; CHECK-NEXT: mov r17, r25
195- ; CHECK-NEXT: vpush.lo.32 x0, r13, x0
174+ ; CHECK-NEXT: nopa ; nopb ; nopx ; vpush.lo.32 x0, r13, x0
196175; CHECK-NEXT: vpush.lo.32 x0, r15, x0
197176; CHECK-NEXT: vpush.lo.32 x0, r11, x0
198177; CHECK-NEXT: vpush.lo.32 x0, r9, x0
@@ -232,56 +211,12 @@ define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b
232211; CHECK-LABEL: test_concat_vector:
233212; CHECK: .p2align 4
234213; CHECK-NEXT: // %bb.0: // %entry
235- ; CHECK-NEXT: nopa ; nopx ; mov r24, r16
236- ; CHECK-NEXT: mova r16, #0
237- ; CHECK-NEXT: vextract.s32 r0, x2, r16
238- ; CHECK-NEXT: vextract.s32 r1, x4, r16
239- ; CHECK-NEXT: nop
240- ; CHECK-NEXT: mova r16, #1
241- ; CHECK-NEXT: vextract.s32 r2, x2, r16
242- ; CHECK-NEXT: vextract.s32 r3, x4, r16
243- ; CHECK-NEXT: nop
244- ; CHECK-NEXT: mova r16, #2
245- ; CHECK-NEXT: vextract.s32 r4, x2, r16
246- ; CHECK-NEXT: vextract.s32 r5, x4, r16
247- ; CHECK-NEXT: nop
248- ; CHECK-NEXT: mova r16, #3
249- ; CHECK-NEXT: vextract.s32 r6, x2, r16
250- ; CHECK-NEXT: vextract.s32 r7, x4, r16
251- ; CHECK-NEXT: nop
252- ; CHECK-NEXT: mova r16, #4
253- ; CHECK-NEXT: vextract.s32 r8, x2, r16
254- ; CHECK-NEXT: vextract.s32 r9, x4, r16
255- ; CHECK-NEXT: nop
256- ; CHECK-NEXT: mova r16, #5
257- ; CHECK-NEXT: vextract.s32 r10, x2, r16
258- ; CHECK-NEXT: vextract.s32 r11, x4, r16
259- ; CHECK-NEXT: nop
260- ; CHECK-NEXT: mova r16, #7
261- ; CHECK-NEXT: vextract.s32 r12, x2, r16
262- ; CHECK-NEXT: vextract.s32 r13, x4, r16
263- ; CHECK-NEXT: nop
264- ; CHECK-NEXT: mova r16, #6
265- ; CHECK-NEXT: vextract.s32 r14, x2, r16
266- ; CHECK-NEXT: vextract.s32 r15, x4, r16
267- ; CHECK-NEXT: vpush.lo.32 x0, r13, x0
268- ; CHECK-NEXT: vpush.lo.32 x0, r15, x0
269- ; CHECK-NEXT: vpush.lo.32 x0, r11, x0
270- ; CHECK-NEXT: vpush.lo.32 x0, r9, x0
271- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
272- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0
273- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0
274- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0
275- ; CHECK-NEXT: vpush.lo.32 x0, r12, x0
276- ; CHECK-NEXT: vpush.lo.32 x0, r14, x0
277- ; CHECK-NEXT: vpush.lo.32 x0, r10, x0
278- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
279- ; CHECK-NEXT: ret lr
280- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
281- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
282- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
283- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
284- ; CHECK-NEXT: mov r16, r24 // Delay Slot 1
214+ ; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
215+ ; CHECK-NEXT: nopx // Delay Slot 5
216+ ; CHECK-NEXT: nop // Delay Slot 4
217+ ; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 3
218+ ; CHECK-NEXT: vmov wh0, wl4 // Delay Slot 2
219+ ; CHECK-NEXT: nop // Delay Slot 1
285220entry:
286221 %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > %b , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
287222 ret <16 x i32 > %shuffle
@@ -291,60 +226,13 @@ define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
291226; CHECK-LABEL: test_set_vector:
292227; CHECK: .p2align 4
293228; CHECK-NEXT: // %bb.0: // %entry
294- ; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16
295- ; CHECK-NEXT: mova r16, #0
296- ; CHECK-NEXT: vextract.s32 r1, x2, r16
297- ; CHECK-NEXT: eqz r0, r0
298- ; CHECK-NEXT: mova r16, #1
299- ; CHECK-NEXT: vextract.s32 r2, x2, r16
300- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
301- ; CHECK-NEXT: mova r16, #2
302- ; CHECK-NEXT: vextract.s32 r3, x2, r16
303- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
304- ; CHECK-NEXT: mova r16, #3
305- ; CHECK-NEXT: vextract.s32 r4, x2, r16
306- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
307- ; CHECK-NEXT: mova r16, #4
308- ; CHECK-NEXT: vextract.s32 r5, x2, r16
309- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
310- ; CHECK-NEXT: mova r16, #5
311- ; CHECK-NEXT: vextract.s32 r6, x2, r16
312- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
313- ; CHECK-NEXT: mova r16, #6
314- ; CHECK-NEXT: vextract.s32 r7, x2, r16
315- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
316- ; CHECK-NEXT: mova r16, #7
317- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
318- ; CHECK-NEXT: vextract.s32 r8, x2, r16
319- ; CHECK-NEXT: add r16, r0, #-1
320- ; CHECK-NEXT: vpush.lo.32 x0, r0, x0
321- ; CHECK-NEXT: vpush.lo.32 x0, r8, x0
322- ; CHECK-NEXT: vpush.lo.32 x0, r7, x0
323- ; CHECK-NEXT: vpush.lo.32 x0, r6, x0
324- ; CHECK-NEXT: vpush.lo.32 x0, r5, x0
325- ; CHECK-NEXT: vpush.lo.32 x0, r4, x0
326- ; CHECK-NEXT: vpush.lo.32 x0, r3, x0
327- ; CHECK-NEXT: vpush.lo.32 x0, r2, x0
328- ; CHECK-NEXT: vpush.lo.32 x0, r1, x0
329- ; CHECK-NEXT: vpush.lo.32 x2, r8, x0
330- ; CHECK-NEXT: vpush.lo.32 x2, r7, x2
331- ; CHECK-NEXT: vpush.lo.32 x2, r6, x2
332- ; CHECK-NEXT: vpush.lo.32 x2, r5, x2
333- ; CHECK-NEXT: vpush.lo.32 x2, r4, x2
334- ; CHECK-NEXT: vpush.lo.32 x2, r3, x2
335- ; CHECK-NEXT: vpush.lo.32 x2, r2, x2
336- ; CHECK-NEXT: vpush.lo.32 x2, r1, x2
337- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
338- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
339- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
340- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
341- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2
229+ ; CHECK-NEXT: nopx ; mov r1, r16
342230; CHECK-NEXT: ret lr
343- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5
344- ; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4
345- ; CHECK-NEXT: vpush.lo.32 x2 , r0, x2 // Delay Slot 3
346- ; CHECK-NEXT: vsel.32 x0, x0, x2 , r16 // Delay Slot 2
347- ; CHECK-NEXT: mov r16, r9 // Delay Slot 1
231+ ; CHECK-NEXT: eqz r0, r0 // Delay Slot 5
232+ ; CHECK-NEXT: vmov wh0, wl2 // Delay Slot 4
233+ ; CHECK-NEXT: add r16 , r0, #-1 // Delay Slot 3
234+ ; CHECK-NEXT: vsel.32 x0, x2, x0 , r16 // Delay Slot 2
235+ ; CHECK-NEXT: mov r16, r1 // Delay Slot 1
348236entry:
349237 %cmp = icmp eq i32 %idx , 0
350238 %shuffle = shufflevector <8 x i32 > %a , <8 x i32 > undef , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef , i32 undef >
0 commit comments