From d1b1fbfc1391a3135c5ea59108d22413ceff3dac Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Sat, 12 Apr 2025 14:39:43 +0200 Subject: [PATCH 1/4] [AArch64][GlobalISel] Adopt some Ld* patterns to reduce codegen regressions. This is an update of #69607 after #101675 and #105686. Ld1Lane64Pat, Ld1Lane128Pat, LoadInsertPatterns, Neon_INS_elt_pattern from SelectionDAG didn't work for GlobalISel on v8i8 and v16i8 vector types, because vector_insert for v8i8, v16i8 in SelectionDAG expects i32 scalar argument type, whereas G_INSERT_VECTOR_ELT expects s8. --- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +- .../AArch64/arm64-extract-insert-varidx.ll | 22 +- .../AArch64/arm64-indexed-vector-ldst.ll | 57 ++-- llvm/test/CodeGen/AArch64/arm64-ld1.ll | 34 +-- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 260 ++++++------------ .../AArch64/arm64-neon-simd-ldst-one.ll | 34 +-- llvm/test/CodeGen/AArch64/ctlz.ll | 11 +- llvm/test/CodeGen/AArch64/ctpop.ll | 11 +- llvm/test/CodeGen/AArch64/load.ll | 24 +- llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 10 +- llvm/test/CodeGen/AArch64/shufflevector.ll | 13 +- llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 10 +- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 10 +- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 10 +- 15 files changed, 174 insertions(+), 343 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 7322212c5bb24..f72d072b6edd1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -516,3 +516,10 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; + +defm : LoadInsertPatterns; +def : Ld1Lane64Pat; +def : Ld1Lane128Pat; +defm : Neon_INS_elt_pattern; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7f7e1d20ae604..6c30aded72d38 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7307,12 +7307,12 @@ multiclass Neon_INS_elt_pattern; + (VT128 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub)), imm:$Immn)>; def : Pat<(VT64 (vector_insert V64:$src, (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), (i64 imm:$Immd))), - (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), + (EXTRACT_SUBREG (INS (VT128 (SUBREG_TO_REG (i64 0), V64:$src, dsub)), imm:$Immd, V128:$Rn, imm:$Immn), dsub)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll index 7a4cdd52db904..bed38b1dc5a0e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll +++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll @@ -88,24 +88,16 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) { ; CHECK-GISEL-NEXT: mov x8, sp ; CHECK-GISEL-NEXT: str q0, [sp] ; CHECK-GISEL-NEXT: and x9, x9, #0xf -; CHECK-GISEL-NEXT: mov b2, v0.b[1] -; CHECK-GISEL-NEXT: mov b3, v0.b[2] ; CHECK-GISEL-NEXT: lsl x10, x9, #1 ; CHECK-GISEL-NEXT: sub x9, x10, x9 ; CHECK-GISEL-NEXT: ldr b1, [x8, x9] -; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GISEL-NEXT: mov b2, v0.b[3] -; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GISEL-NEXT: mov b3, v0.b[4] -; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0] -; CHECK-GISEL-NEXT: mov b2, v0.b[5] -; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0] -; CHECK-GISEL-NEXT: mov b3, v0.b[6] -; CHECK-GISEL-NEXT: mov b0, v0.b[7] -; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0] -; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0] -; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GISEL-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GISEL-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GISEL-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GISEL-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GISEL-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GISEL-NEXT: fmov d0, d1 ; CHECK-GISEL-NEXT: add sp, sp, #16 ; CHECK-GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 0412aef7545e9..4d0603722c3ae 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -13326,10 +13326,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] +; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] -; CHECK-GI-NEXT: mov.b v0[1], v1[0] ; CHECK-GI-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 @@ -13373,11 +13372,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0] ; CHECK-GI-NEXT: str x8, [x1] -; CHECK-GI-NEXT: mov.b v0[1], v1[0] ; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %tmp1 = load i8, ptr %bar @@ -13891,43 +13889,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> } define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) { -; CHECK-SD-LABEL: test_ld1lane_build_i8: -; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: ldr x8, [sp] -; CHECK-SD-NEXT: ld1.b { v1 }[1], [x1] -; CHECK-SD-NEXT: ld1.b { v1 }[2], [x2] -; CHECK-SD-NEXT: ld1.b { v1 }[3], [x3] -; CHECK-SD-NEXT: ld1.b { v1 }[4], [x4] -; CHECK-SD-NEXT: ld1.b { v1 }[5], [x5] -; CHECK-SD-NEXT: ld1.b { v1 }[6], [x6] -; CHECK-SD-NEXT: ld1.b { v1 }[7], [x7] -; CHECK-SD-NEXT: sub.8b v0, v1, v0 -; CHECK-SD-NEXT: str d0, [x8] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_ld1lane_build_i8: -; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: ldr b2, [x1] -; CHECK-GI-NEXT: ldr x8, [sp] -; CHECK-GI-NEXT: mov.b v1[0], v1[0] -; CHECK-GI-NEXT: mov.b v1[1], v2[0] -; CHECK-GI-NEXT: ldr b2, [x2] -; CHECK-GI-NEXT: mov.b v1[2], v2[0] -; CHECK-GI-NEXT: ldr b2, [x3] -; CHECK-GI-NEXT: mov.b v1[3], v2[0] -; CHECK-GI-NEXT: ldr b2, [x4] -; CHECK-GI-NEXT: mov.b v1[4], v2[0] -; CHECK-GI-NEXT: ldr b2, [x5] -; CHECK-GI-NEXT: mov.b v1[5], v2[0] -; CHECK-GI-NEXT: ldr b2, [x6] -; CHECK-GI-NEXT: mov.b v1[6], v2[0] -; CHECK-GI-NEXT: ldr b2, [x7] -; CHECK-GI-NEXT: mov.b v1[7], v2[0] -; CHECK-GI-NEXT: sub.8b v0, v1, v0 -; CHECK-GI-NEXT: str d0, [x8] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_ld1lane_build_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ld1.b { v1 }[1], [x1] +; CHECK-NEXT: ld1.b { v1 }[2], [x2] +; CHECK-NEXT: ld1.b { v1 }[3], [x3] +; CHECK-NEXT: ld1.b { v1 }[4], [x4] +; CHECK-NEXT: ld1.b { v1 }[5], [x5] +; CHECK-NEXT: ld1.b { v1 }[6], [x6] +; CHECK-NEXT: ld1.b { v1 }[7], [x7] +; CHECK-NEXT: sub.8b v0, v1, v0 +; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: ret %ld.a = load i8, ptr %a %ld.b = load i8, ptr %b %ld.c = load i8, ptr %c diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index eaa545473b2e0..0b22fa49cb5c1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 @@ -1086,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) { } define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { -; CHECK-SD-LABEL: ld1_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld1_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: ldr b1, [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov.b v0[0], v1[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld1_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1.b { v0 }[0], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret ; Make sure we are using the operands defined by the ABI %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 2a085dc0e72bf..78ba7aaaa18ec 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -75,18 +75,11 @@ define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { } define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { -; CHECK-SD-LABEL: ins16b16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.b[15], v0.b[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins16b16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[15], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins16b16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.b[15], v0.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 @@ -148,20 +141,12 @@ define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { } define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { -; CHECK-SD-LABEL: ins8b16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.b[15], v0.b[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8b16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[15], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8b16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.b[15], v0.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 ret <16 x i8> %tmp4 @@ -239,20 +224,12 @@ define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) } define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { -; CHECK-SD-LABEL: ins16b8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v1.b[7], v0.b[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins16b8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.b[7], v2.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins16b8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v1.b[7], v0.b[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <16 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 ret <8 x i8> %tmp4 @@ -321,22 +298,13 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { } define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { -; CHECK-SD-LABEL: ins8b8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.b[4], v0.b[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8b8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.b[4], v2.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8b8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.b[4], v0.b[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i8> %tmp1, i32 2 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 ret <8 x i8> %tmp4 @@ -617,37 +585,22 @@ define i64 @smovx2s(<2 x i32> %tmp1) { } define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopy_lane_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v0.b[5], v1.b[3] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopy_lane_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b1, v1.b[3] -; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopy_lane_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.b[5], v1.b[3] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> ret <8 x i8> %vset_lane } define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopyq_laneq_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v0.b[14], v1.b[6] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopyq_laneq_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b1, v1.b[6] -; CHECK-GI-NEXT: mov v0.b[14], v1.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopyq_laneq_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v0.b[14], v1.b[6] +; CHECK-NEXT: ret %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> ret <16 x i8> %vset_lane } @@ -665,18 +618,11 @@ define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { } define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { -; CHECK-SD-LABEL: test_vcopyq_laneq_swap_s8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.b[0], v0.b[15] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vcopyq_laneq_swap_s8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[15] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.b[0], v2.b[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vcopyq_laneq_swap_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.b[0], v0.b[15] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> ret <16 x i8> %vset_lane } @@ -1358,21 +1304,14 @@ define <8 x i8> @getl(<16 x i8> %x) #0 { ; ; CHECK-GI-LABEL: getl: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[3] -; CHECK-GI-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[4] -; CHECK-GI-NEXT: mov v1.b[3], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[5] -; CHECK-GI-NEXT: mov v1.b[4], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v1.b[5], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[6], v3.b[0] -; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GI-NEXT: fmov d0, d1 ; CHECK-GI-NEXT: ret %vecext = extractelement <16 x i8> %x, i32 0 @@ -1804,22 +1743,15 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov v2.16b, v1.16b ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b3, v0.b[1] ; CHECK-GI-NEXT: adrp x8, .LCPI127_0 ; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v1.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v1.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v1.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v1.b[5], v3.b[0] -; CHECK-GI-NEXT: mov v1.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v1.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v1.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v1.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[7] ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI127_0] ; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-GI-NEXT: ret @@ -1853,37 +1785,23 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v8i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov b3, v0.b[1] ; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v2.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v2.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v2.b[5], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[2] -; CHECK-GI-NEXT: mov v2.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v2.b[7], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[1] +; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v2.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v2.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v2.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v2.b[7], v0.b[7] ; CHECK-GI-NEXT: mov v2.b[8], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[9], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[3] -; CHECK-GI-NEXT: mov v2.b[10], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[4] -; CHECK-GI-NEXT: mov v2.b[11], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[5] -; CHECK-GI-NEXT: mov v2.b[12], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[6] -; CHECK-GI-NEXT: mov v2.b[13], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[7] -; CHECK-GI-NEXT: mov v2.b[14], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[15], v0.b[0] +; CHECK-GI-NEXT: mov v2.b[9], v1.b[1] +; CHECK-GI-NEXT: mov v2.b[10], v1.b[2] +; CHECK-GI-NEXT: mov v2.b[11], v1.b[3] +; CHECK-GI-NEXT: mov v2.b[12], v1.b[4] +; CHECK-GI-NEXT: mov v2.b[13], v1.b[5] +; CHECK-GI-NEXT: mov v2.b[14], v1.b[6] +; CHECK-GI-NEXT: mov v2.b[15], v1.b[7] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -1933,37 +1851,23 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b3, v0.b[1] ; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v2.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v2.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v2.b[5], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[2] -; CHECK-GI-NEXT: mov v2.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v2.b[7], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[1] +; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] +; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[3] +; CHECK-GI-NEXT: mov v2.b[4], v0.b[4] +; CHECK-GI-NEXT: mov v2.b[5], v0.b[5] +; CHECK-GI-NEXT: mov v2.b[6], v0.b[6] +; CHECK-GI-NEXT: mov v2.b[7], v0.b[7] ; CHECK-GI-NEXT: mov v2.b[8], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[9], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[3] -; CHECK-GI-NEXT: mov v2.b[10], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[4] -; CHECK-GI-NEXT: mov v2.b[11], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[5] -; CHECK-GI-NEXT: mov v2.b[12], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[6] -; CHECK-GI-NEXT: mov v2.b[13], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[7] -; CHECK-GI-NEXT: mov v2.b[14], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[15], v0.b[0] +; CHECK-GI-NEXT: mov v2.b[9], v1.b[1] +; CHECK-GI-NEXT: mov v2.b[10], v1.b[2] +; CHECK-GI-NEXT: mov v2.b[11], v1.b[3] +; CHECK-GI-NEXT: mov v2.b[12], v1.b[4] +; CHECK-GI-NEXT: mov v2.b[13], v1.b[5] +; CHECK-GI-NEXT: mov v2.b[14], v1.b[6] +; CHECK-GI-NEXT: mov v2.b[15], v1.b[7] ; CHECK-GI-NEXT: mov v0.16b, v2.16b ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index 7721616be436c..f47c06e1ba4cb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -329,16 +329,10 @@ define <1 x double> @testDUP.v1f64(ptr %a, ptr %b) #0 { } define <16 x i8> @test_vld1q_lane_s8(ptr %a, <16 x i8> %b) { -; CHECK-GI-LABEL: test_vld1q_lane_s8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ld1 { v0.b }[15], [x0] -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vld1q_lane_s8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: mov v0.b[15], v1.b[0] -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vld1q_lane_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ld1 { v0.b }[15], [x0] +; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1 %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15 @@ -401,20 +395,12 @@ entry: } define <8 x i8> @test_vld1_lane_s8(ptr %a, <8 x i8> %b) { -; CHECK-GI-LABEL: test_vld1_lane_s8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: ld1 { v0.b }[7], [x0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret -; -; CHECK-SD-LABEL: test_vld1_lane_s8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldr b1, [x0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v0.b[7], v1.b[0] -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_vld1_lane_s8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: ld1 { v0.b }[7], [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret entry: %0 = load i8, ptr %a, align 1 %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7 diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll index fcd1fa2983420..a4863d1f74200 100644 --- a/llvm/test/CodeGen/AArch64/ctlz.ll +++ b/llvm/test/CodeGen/AArch64/ctlz.ll @@ -21,10 +21,8 @@ define void @v2i8(ptr %p1) { ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: clz v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] @@ -59,13 +57,10 @@ define void @v3i8(ptr %p1) { ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: add x9, x0, #2 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: clz v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll index 10ec1d0c1982a..55f75b6bc3f27 100644 --- a/llvm/test/CodeGen/AArch64/ctpop.ll +++ b/llvm/test/CodeGen/AArch64/ctpop.ll @@ -21,10 +21,8 @@ define void @v2i8(ptr %p1) { ; CHECK-GI-LABEL: v2i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] @@ -58,13 +56,10 @@ define void @v3i8(ptr %p1) { ; CHECK-GI-LABEL: v3i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] ; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: add x9, x0, #2 -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x9] ; CHECK-GI-NEXT: cnt v0.8b, v0.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x0] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll index 3fa5d64a210e1..6b26ae98a4ed8 100644 --- a/llvm/test/CodeGen/AArch64/load.ll +++ b/llvm/test/CodeGen/AArch64/load.ll @@ -353,19 +353,19 @@ define <7 x i8> @load_v7i8(ptr %ptr) { ; CHECK-GI-LABEL: load_v7i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] -; CHECK-GI-NEXT: ldr b1, [x0, #1] +; CHECK-GI-NEXT: add x8, x0, #1 ; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #2] -; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #3] -; CHECK-GI-NEXT: mov v0.b[3], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #4] -; CHECK-GI-NEXT: mov v0.b[4], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #5] -; CHECK-GI-NEXT: mov v0.b[5], v1.b[0] -; CHECK-GI-NEXT: ldr b1, [x0, #6] -; CHECK-GI-NEXT: mov v0.b[6], v1.b[0] +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-GI-NEXT: add x8, x0, #3 +; CHECK-GI-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ld1 { v0.b }[4], [x8] +; CHECK-GI-NEXT: add x8, x0, #5 +; CHECK-GI-NEXT: ld1 { v0.b }[5], [x8] +; CHECK-GI-NEXT: add x8, x0, #6 +; CHECK-GI-NEXT: ld1 { v0.b }[6], [x8] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret %a = load <7 x i8>, ptr %ptr diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index d54dde3c86364..e1018bbee7893 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -209,13 +209,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 4c8f0c9c446f5..e5c07e0c81e35 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -208,14 +208,13 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){ ; ; CHECK-GI-LABEL: shufflevector_v2i1: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov w8, v1.s[1] ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov w9, v0.s[1] -; CHECK-GI-NEXT: mov v1.b[1], w8 -; CHECK-GI-NEXT: mov v0.b[1], w9 -; CHECK-GI-NEXT: mov b1, v1.b[1] -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, v0.s[1] +; CHECK-GI-NEXT: mov w9, v1.s[1] +; CHECK-GI-NEXT: mov v0.b[1], w8 +; CHECK-GI-NEXT: mov v1.b[1], w9 +; CHECK-GI-NEXT: mov v0.b[1], v1.b[1] ; CHECK-GI-NEXT: umov w8, v0.b[0] ; CHECK-GI-NEXT: umov w9, v0.b[1] ; CHECK-GI-NEXT: mov v0.s[0], w8 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index dc39ad0571b14..085857c0c5428 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -210,13 +210,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index 14a578fa317d0..b0b3198fda0e6 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -207,13 +207,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index ddb3332abf5d0..54754e7fbaed6 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -206,13 +206,11 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: ldr b0, [x0] ; CHECK-GI-NEXT: ldr b1, [x1] +; CHECK-GI-NEXT: add x8, x0, #1 +; CHECK-GI-NEXT: add x9, x1, #1 +; CHECK-GI-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-GI-NEXT: ld1 { v1.b }[1], [x9] ; CHECK-GI-NEXT: add x8, x2, #1 -; CHECK-GI-NEXT: ldr b2, [x0, #1] -; CHECK-GI-NEXT: ldr b3, [x1, #1] -; CHECK-GI-NEXT: mov v0.b[0], v0.b[0] -; CHECK-GI-NEXT: mov v1.b[0], v1.b[0] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] ; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-GI-NEXT: st1 { v0.b }[0], [x2] ; CHECK-GI-NEXT: st1 { v0.b }[1], [x8] From 05041c11e6f8f48cfd6f3c733f483f9584d8c932 Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Thu, 24 Apr 2025 17:18:17 +0200 Subject: [PATCH 2/4] Move patterns to AArch64InstrInfo.td; add OnlyGISel; rebase --- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 ------- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 19 ++++++++++++++++++- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index f72d072b6edd1..7322212c5bb24 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -516,10 +516,3 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv2d GPR64sp:$Rn)>; def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), (LD1Rv1d GPR64sp:$Rn)>; - -defm : LoadInsertPatterns; -def : Ld1Lane64Pat; -def : Ld1Lane128Pat; -defm : Neon_INS_elt_pattern; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6c30aded72d38..25f41c41c7fab 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1114,12 +1114,15 @@ let RecomputePerFunction = 1 in { def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; + + defvar GISelLegalizedCheck = "(!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized))"; + def OnlyGISel : Predicate; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour. // FIXME: One day there will probably be a nicer way to check for this, but // today is not that day. - def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; + def OptimizedGISelOrOtherSelector : PredicategetFunction().hasOptNone() || !", GISelLegalizedCheck)>; } include "AArch64InstrFormats.td" @@ -4036,6 +4039,11 @@ multiclass LoadInsertPatterns; } +let Predicates = [OnlyGISel] in { +defm : LoadInsertPatterns; +} defm : LoadInsertPatterns; @@ -7330,6 +7338,9 @@ defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; +let Predicates = [OnlyGISel] in { +defm : Neon_INS_elt_pattern; +} defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; @@ -8752,6 +8763,9 @@ class Ld1Lane128Pat; +let Predicates = [OnlyGISel] in { +def : Ld1Lane128Pat; +} def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; @@ -8825,6 +8839,9 @@ class Ld1Lane64Pat; +let Predicates = [OnlyGISel] in { +def : Ld1Lane64Pat; +} def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat; diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 78ba7aaaa18ec..51f1351a5edf4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1785,7 +1785,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v8i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov v2.b[1], v0.b[1] ; CHECK-GI-NEXT: mov v2.b[2], v0.b[2] From 14425edc6ab4fbdf5354b49c039fde63a9d9c51c Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Mon, 5 May 2025 19:52:23 +0200 Subject: [PATCH 3/4] OnlyGISel comment --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 25f41c41c7fab..90ff828695d67 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1116,6 +1116,8 @@ let RecomputePerFunction = 1 in { def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; defvar GISelLegalizedCheck = "(!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized))"; + // Toggles patterns which are not used by SelectionDAG to reduce its + // match table/so as not to interfere with it. def OnlyGISel : Predicate; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting From 4ffec2ebeb38f49c555101dfe873e22b820dfdbe Mon Sep 17 00:00:00 2001 From: Vladislav Dzhidzhoev Date: Mon, 12 May 2025 13:02:32 +0200 Subject: [PATCH 4/4] Removed GlobalISel; added comments --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 90ff828695d67..9bcee256aad1e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1115,16 +1115,12 @@ let RecomputePerFunction = 1 in { def SLSBLRMitigation : Predicate<[{ MF->getSubtarget().hardenSlsBlr() }]>; def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget().hardenSlsBlr() }]>; - defvar GISelLegalizedCheck = "(!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized))"; - // Toggles patterns which are not used by SelectionDAG to reduce its - // match table/so as not to interfere with it. - def OnlyGISel : Predicate; // Toggles patterns which aren't beneficial in GlobalISel when we aren't // optimizing. This allows us to selectively use patterns without impacting // SelectionDAG's behaviour. // FIXME: One day there will probably be a nicer way to check for this, but // today is not that day. - def OptimizedGISelOrOtherSelector : PredicategetFunction().hasOptNone() || !", GISelLegalizedCheck)>; + def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; } include "AArch64InstrFormats.td" @@ -4041,11 +4037,10 @@ multiclass LoadInsertPatterns; } -let Predicates = [OnlyGISel] in { +// Accept i8 scalar argument in GlobalISel. defm : LoadInsertPatterns; -} defm : LoadInsertPatterns; @@ -7340,9 +7335,8 @@ defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; -let Predicates = [OnlyGISel] in { +// Accept i8 scalar argument in GlobalISel. defm : Neon_INS_elt_pattern; -} defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; defm : Neon_INS_elt_pattern; @@ -8765,9 +8759,8 @@ class Ld1Lane128Pat; -let Predicates = [OnlyGISel] in { +// Accept i8 scalar argument in GlobalISel. def : Ld1Lane128Pat; -} def : Ld1Lane128Pat; def : Ld1Lane128Pat; def : Ld1Lane128Pat; @@ -8841,9 +8834,8 @@ class Ld1Lane64Pat; -let Predicates = [OnlyGISel] in { +// Accept i8 scalar argument in GlobalISel. def : Ld1Lane64Pat; -} def : Ld1Lane64Pat; def : Ld1Lane64Pat; def : Ld1Lane64Pat;