diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7f7e1d20ae604..9bcee256aad1e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1114,6 +1114,7 @@ let RecomputePerFunction = 1 in { def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; + // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour. @@ -4036,6 +4037,10 @@ multiclass LoadInsertPatterns; } +// Accept i8 scalar argument in GlobalISel. +defm : LoadInsertPatterns; defm : LoadInsertPatterns; @@ -7307,12 +7312,12 @@ multiclass Neon_INS_elt_pattern; + (VT128 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub)), imm:$Immn)>; def : Pat<(VT64 (vector_insert V64:$src, (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), - (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), + (EXTRACT_SUBREG (INS (VT128 (SUBREG_TO_REG (i64 0), V64:$src, dsub)), imm:$Immd, V128:$Rn, imm:$Immn), dsub)>; @@ -7330,6 +7335,8 @@ defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; +// Accept i8 scalar argument in GlobalISel. +defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; @@ -8752,6 +8759,8 @@ class Ld1Lane128Pat; +// Accept i8 scalar argument in GlobalISel. +def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; @@ -8825,6 +8834,8 @@ class Ld1Lane64Pat; +// Accept i8 scalar argument in GlobalISel. +def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll index 7a4cdd52db904..bed38b1dc5a0e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -88,24 +88,16 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) { ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0xf -; CHECK-GISEL-NEXT: mov b2, v0.b[1] -; CHECK-GISEL-NEXT: mov b3, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: sub x9, x10, x9 ; CHECK-GISEL-NEXT: ldr b1, [x8, x9] -; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GISEL-NEXT: mov b2, v0.b[3] -; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GISEL-NEXT: mov b3, v0.b[4] -; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0] -; CHECK-GISEL-NEXT: mov b2, v0.b[5] -; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0] -; CHECK-GISEL-NEXT: mov b3, v0.b[6] -; CHECK-GISEL-NEXT: mov b0, v0.b[7] -; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0] -; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0] -; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GISEL-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GISEL-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GISEL-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GISEL-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GISEL-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 0412aef7545e9..4d0603722c3ae 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -13326,10 +13326,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] +; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] -; CHECK-GI-NEXT: mov.b v0[1], v1[0] ; CHECK-GI-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 @@ -13373,11 +13372,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GI-NEXT: str x8, [x1] -; CHECK-GI-NEXT: mov.b v0[1], v1[0] ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %tmp1 = load i8, ptr %bar @@ -13891,43 +13889,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> } define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) { -; CHECK-SD-LABEL: test_ld1lane_build_i8: -; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: ldr x8, [sp] -; CHECK-SD-NEXT: ld1.b { v1 }[1], [x1] -; CHECK-SD-NEXT: ld1.b { v1 }[2], [x2] -; CHECK-SD-NEXT: ld1.b { v1 }[3], [x3] -; CHECK-SD-NEXT: ld1.b { v1 }[4], [x4] -; CHECK-SD-NEXT: ld1.b { v1 }[5], [x5] -; CHECK-SD-NEXT: ld1.b { v1 }[6], [x6] -; CHECK-SD-NEXT: ld1.b { v1 }[7], [x7] -; CHECK-SD-NEXT: sub.8b v0, v1, v0 -; CHECK-SD-NEXT: str d0, [x8] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_ld1lane_build_i8: -; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: ldr b2, [x1] -; CHECK-GI-NEXT: ldr x8, [sp] -; CHECK-GI-NEXT: mov.b v1[0], v1[0] -; CHECK-GI-NEXT: mov.b v1[1], v2[0] -; CHECK-GI-NEXT: ldr b2, [x2] -; CHECK-GI-NEXT: mov.b v1[2], v2[0] -; CHECK-GI-NEXT: ldr b2, [x3] -; CHECK-GI-NEXT: mov.b v1[3], v2[0] -; CHECK-GI-NEXT: ldr b2, [x4] -; CHECK-GI-NEXT: mov.b v1[4], v2[0] -; CHECK-GI-NEXT: ldr b2, [x5] -; CHECK-GI-NEXT: mov.b v1[5], v2[0] -; CHECK-GI-NEXT: ldr b2, [x6] -; CHECK-GI-NEXT: mov.b v1[6], v2[0] -; CHECK-GI-NEXT: ldr b2, [x7] -; CHECK-GI-NEXT: mov.b v1[7], v2[0] -; CHECK-GI-NEXT: sub.8b v0, v1, v0 -; CHECK-GI-NEXT: str d0, [x8] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_ld1lane_build_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ld1.b { v1 }[1], [x1] +; CHECK-NEXT: ld1.b { v1 }[2], [x2] +; CHECK-NEXT: ld1.b { v1 }[3], [x3] +; CHECK-NEXT: ld1.b { v1 }[4], [x4] +; CHECK-NEXT: ld1.b { v1 }[5], [x5] +; CHECK-NEXT: ld1.b { v1 }[6], [x6] +; CHECK-NEXT: ld1.b { v1 }[7], [x7] +; CHECK-NEXT: sub.8b v0, v1, v0 +; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: ret %ld.a = load i8, ptr %a %ld.b = load i8, ptr %b %ld.c = load i8, ptr %c diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index eaa545473b2e0..0b22fa49cb5c1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 @@ -1086,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) { } define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 2a085dc0e72bf..51f1351a5edf4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -75,18 +75,11 @@ define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { } define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -; CHECK-SD-LABEL: ins16b16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.b[15], v0.b[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins16b16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[15], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins16b16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.b[15], v0.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 @@ -148,20 +141,12 @@ define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { } define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -; CHECK-SD-LABEL: ins8b16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.b[15], v0.b[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8b16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[15], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8b16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.b[15], v0.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 @@ -239,20 +224,12 @@ define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) } define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -; CHECK-SD-LABEL: ins16b8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v1.b[7], v0.b[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins16b8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.b[7], v2.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins16b8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v1.b[7], v0.b[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 ret <8 x i8> %tmp4 @@ -321,22 +298,13 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { } define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -; CHECK-SD-LABEL: ins8b8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.b[4], v0.b[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8b8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.b[4], v2.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8b8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.b[4], v0.b[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 ret <8 x i8> %tmp4 @@ -617,37 +585,22 @@ define i64 @smovx2s(<2 x i32> %tmp1) { } define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopy_lane_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v0.b[5], v1.b[3] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopy_lane_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopy_lane_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.b[5], v1.b[3] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> ret <8 x i8> %vset_lane } define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopyq_laneq_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v0.b[14], v1.b[6] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopyq_laneq_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b1, v1.b[6] -; CHECK-GI-NEXT: mov v0.b[14], v1.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopyq_laneq_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.b[14], v1.b[6] +; CHECK-NEXT: ret %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> ret <16 x i8> %vset_lane } @@ -665,18 +618,11 @@ define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { } define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopyq_laneq_swap_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.b[0], v0.b[15] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopyq_laneq_swap_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[15] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[0], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopyq_laneq_swap_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.b[0], v0.b[15] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> ret <16 x i8> %vset_lane } @@ -1358,21 +1304,14 @@ define <8 x i8> @getl(<16 x i8> %x) #0 { ; ; CHECK-GI-LABEL: getl: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[3] -; CHECK-GI-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[4] -; CHECK-GI-NEXT: mov v1.b[3], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[5] -; CHECK-GI-NEXT: mov v1.b[4], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v1.b[5], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[6], v3.b[0] -; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: ret %vecext = extractelement <16 x i8> %x, i32 0 @@ -1804,22 +1743,15 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b3, v0.b[1] ; CHECK-GI-NEXT: adrp x8, .LCPI127_0 ; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v1.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v1.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v1.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v1.b[5], v3.b[0] -; CHECK-GI-NEXT: mov v1.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI127_0] ; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-GI-NEXT: ret @@ -1853,37 +1785,23 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v8i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov b3, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v2.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v2.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v2.b[5], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[2] -; CHECK-GI-NEXT: mov v2.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v2.b[7], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[1] +; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v2.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v2.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v2.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v2.b[7], v0.b[7] ; CHECK-GI-NEXT: mov v2.b[8], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[9], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[3] -; CHECK-GI-NEXT: mov v2.b[10], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[4] -; CHECK-GI-NEXT: mov v2.b[11], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[5] -; CHECK-GI-NEXT: mov v2.b[12], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[6] -; CHECK-GI-NEXT: mov v2.b[13], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[7] -; CHECK-GI-NEXT: mov v2.b[14], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[15], v0.b[0] +; CHECK-GI-NEXT: mov v2.b[9], v1.b[1] +; CHECK-GI-NEXT: mov v2.b[10], v1.b[2] +; CHECK-GI-NEXT: mov v2.b[11], v1.b[3] +; CHECK-GI-NEXT: mov v2.b[12], v1.b[4] +; CHECK-GI-NEXT: mov v2.b[13], v1.b[5] +; CHECK-GI-NEXT: mov v2.b[14], v1.b[6] +; CHECK-GI-NEXT: mov v2.b[15], v1.b[7] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -1933,37 +1851,23 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b3, v0.b[1] ; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v2.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v2.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v2.b[5], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[2] -; CHECK-GI-NEXT: mov v2.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v2.b[7], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[1] +; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v2.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v2.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v2.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v2.b[7], v0.b[7] ; CHECK-GI-NEXT: mov v2.b[8], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[9], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[3] -; CHECK-GI-NEXT: mov v2.b[10], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[4] -; CHECK-GI-NEXT: mov v2.b[11], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[5] -; CHECK-GI-NEXT: mov v2.b[12], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[6] -; CHECK-GI-NEXT: mov v2.b[13], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[7] -; CHECK-GI-NEXT: mov v2.b[14], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[15], v0.b[0] +; CHECK-GI-NEXT: mov v2.b[9], v1.b[1] +; CHECK-GI-NEXT: mov v2.b[10], v1.b[2] +; CHECK-GI-NEXT: mov v2.b[11], v1.b[3] +; CHECK-GI-NEXT: mov v2.b[12], v1.b[4] +; CHECK-GI-NEXT: mov v2.b[13], v1.b[5] +; CHECK-GI-NEXT: mov v2.b[14], v1.b[6] +; CHECK-GI-NEXT: mov v2.b[15], v1.b[7] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index 7721616be436c..f47c06e1ba4cb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -329,16 +329,10 @@ define <1 x double> @testDUP.v1f64(ptr %a, ptr %b) #0 { } define <16 x i8> @test_vld1q_lane_s8(ptr %a, <16 x i8> %b) { -; CHECK-GI-LABEL: test_vld1q_lane_s8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[15], [x0] -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vld1q_lane_s8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: mov v0.b[15], v1.b[0] -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vld1q_lane_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.b }[15], [x0] +; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1 %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 @@ -401,20 +395,12 @@ entry: } define <8 x i8> @test_vld1_lane_s8(ptr %a, <8 x i8> %b) { -; CHECK-GI-LABEL: test_vld1_lane_s8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ld1 { v0.b }[7], [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vld1_lane_s8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v0.b[7], v1.b[0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vld1_lane_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.b }[7], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1 %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll index fcd1fa2983420..a4863d1f74200 100644 --- a/llvm/test/CodeGen/AArch64/ctlz.ll +++ b/llvm/test/CodeGen/AArch64/ctlz.ll @@ -21,10 +21,8 @@ define void @v2i8(ptr %p1) { ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: clz v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] @@ -59,13 +57,10 @@ define void @v3i8(ptr %p1) { ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: add x9, x0, #2 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: clz v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll index 10ec1d0c1982a..55f75b6bc3f27 100644 --- a/llvm/test/CodeGen/AArch64/ctpop.ll +++ b/llvm/test/CodeGen/AArch64/ctpop.ll @@ -21,10 +21,8 @@ define void @v2i8(ptr %p1) { ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] @@ -58,13 +56,10 @@ define void @v3i8(ptr %p1) { ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: add x9, x0, #2 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index 3fa5d64a210e1..6b26ae98a4ed8 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -353,19 +353,19 @@ define <7 x i8> @load_v7i8(ptr %ptr) { ; CHECK-GI-LABEL: load_v7i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] +; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #3] -; CHECK-GI-NEXT: mov v0.b[3], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #4] -; CHECK-GI-NEXT: mov v0.b[4], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #5] -; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #6] -; CHECK-GI-NEXT: mov v0.b[6], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-GI-NEXT: add x8, x0, #3 +; CHECK-GI-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-GI-NEXT: add x8, x0, #5 +; CHECK-GI-NEXT: ld1 { v0.b }[5], [x8] +; CHECK-GI-NEXT: add x8, x0, #6 +; CHECK-GI-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = load <7 x i8>, ptr %ptr diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index d54dde3c86364..e1018bbee7893 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -209,13 +209,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 4c8f0c9c446f5..e5c07e0c81e35 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -208,14 +208,13 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ ; ; CHECK-GI-LABEL: shufflevector_v2i1: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov w8, v1.s[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: mov v1.b[1], w8 -; CHECK-GI-NEXT: mov v0.b[1], w9 -; CHECK-GI-NEXT: mov b1, v1.b[1] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v1.s[1] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v1.b[1], w9 +; CHECK-GI-NEXT: mov v0.b[1], v1.b[1] ; CHECK-GI-NEXT: umov w8, v0.b[0] ; CHECK-GI-NEXT: umov w9, v0.b[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index dc39ad0571b14..085857c0c5428 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -210,13 +210,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 14a578fa317d0..b0b3198fda0e6 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -207,13 +207,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index ddb3332abf5d0..54754e7fbaed6 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -206,13 +206,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8]