@@ -229,15 +229,17 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
229229define i32 @shufflevector_v4i8 (<4 x i8 > %a , <4 x i8 > %b ){
230230; CHECK-SD-LABEL: shufflevector_v4i8:
231231; CHECK-SD: // %bb.0:
232- ; CHECK-SD-NEXT: sub sp, sp, #16
233- ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
234- ; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
235- ; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
236- ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
237- ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
238- ; CHECK-SD-NEXT: fmov w0, s0
239- ; CHECK-SD-NEXT: add sp, sp, #16
240- ; CHECK-SD-NEXT: ret
232+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
233+ ; CHECK-SD-NEXT: umov w8, v0.h[1]
234+ ; CHECK-SD-NEXT: umov w9, v0.h[2]
235+ ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
236+ ; CHECK-SD-NEXT: umov w10, v1.h[0]
237+ ; CHECK-SD-NEXT: and w8, w8, #0xff
238+ ; CHECK-SD-NEXT: bfi w8, w9, #8, #8
239+ ; CHECK-SD-NEXT: umov w9, v1.h[3]
240+ ; CHECK-SD-NEXT: bfi w8, w10, #16, #8
241+ ; CHECK-SD-NEXT: orr w0, w8, w9, lsl #24
242+ ; CHECK-SD-NEXT: ret
241243;
242244; CHECK-GI-LABEL: shufflevector_v4i8:
243245; CHECK-GI: // %bb.0:
@@ -285,15 +287,11 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
285287define i32 @shufflevector_v2i16 (<2 x i16 > %a , <2 x i16 > %b ){
286288; CHECK-SD-LABEL: shufflevector_v2i16:
287289; CHECK-SD: // %bb.0:
288- ; CHECK-SD-NEXT: sub sp, sp, #16
289- ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
290- ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
291- ; CHECK-SD-NEXT: mov w8, v0.s[1]
292- ; CHECK-SD-NEXT: fmov w9, s0
293- ; CHECK-SD-NEXT: strh w9, [sp, #12]
294- ; CHECK-SD-NEXT: strh w8, [sp, #14]
295- ; CHECK-SD-NEXT: ldr w0, [sp, #12]
296- ; CHECK-SD-NEXT: add sp, sp, #16
290+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
291+ ; CHECK-SD-NEXT: mov w0, v0.s[1]
292+ ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
293+ ; CHECK-SD-NEXT: fmov w8, s1
294+ ; CHECK-SD-NEXT: bfi w0, w8, #16, #16
297295; CHECK-SD-NEXT: ret
298296;
299297; CHECK-GI-LABEL: shufflevector_v2i16:
@@ -462,14 +460,13 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
462460define i32 @shufflevector_v4i8_zeroes (<4 x i8 > %a , <4 x i8 > %b ){
463461; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
464462; CHECK-SD: // %bb.0:
465- ; CHECK-SD-NEXT: sub sp, sp, #16
466- ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
467- ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
468- ; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
469- ; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
470- ; CHECK-SD-NEXT: fmov w0, s0
471- ; CHECK-SD-NEXT: add sp, sp, #16
472- ; CHECK-SD-NEXT: ret
463+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
464+ ; CHECK-SD-NEXT: umov w8, v0.h[0]
465+ ; CHECK-SD-NEXT: and w9, w8, #0xff
466+ ; CHECK-SD-NEXT: orr w9, w9, w9, lsl #8
467+ ; CHECK-SD-NEXT: bfi w9, w8, #16, #8
468+ ; CHECK-SD-NEXT: orr w0, w9, w8, lsl #24
469+ ; CHECK-SD-NEXT: ret
473470;
474471; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
475472; CHECK-GI: // %bb.0:
@@ -495,16 +492,9 @@ define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
495492define i32 @shufflevector_v2i16_zeroes (<2 x i16 > %a , <2 x i16 > %b ){
496493; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
497494; CHECK-SD: // %bb.0:
498- ; CHECK-SD-NEXT: sub sp, sp, #16
499- ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
500495; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
501- ; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
502- ; CHECK-SD-NEXT: fmov w9, s0
503- ; CHECK-SD-NEXT: strh w9, [sp, #12]
504- ; CHECK-SD-NEXT: mov w8, v1.s[1]
505- ; CHECK-SD-NEXT: strh w8, [sp, #14]
506- ; CHECK-SD-NEXT: ldr w0, [sp, #12]
507- ; CHECK-SD-NEXT: add sp, sp, #16
496+ ; CHECK-SD-NEXT: fmov w0, s0
497+ ; CHECK-SD-NEXT: bfi w0, w0, #16, #16
508498; CHECK-SD-NEXT: ret
509499;
510500; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
0 commit comments