Skip to content

Commit d1b1fbf

Browse files
committed
[AArch64][GlobalISel] Adopt some Ld* patterns to reduce codegen regressions.
This is an update of #69607 after #101675 and #105686. Ld1Lane64Pat, Ld1Lane128Pat, LoadInsertPatterns, Neon_INS_elt_pattern from SelectionDAG didn't work for GlobalISel on v8i8 and v16i8 vector types, because vector_insert for v8i8, v16i8 in SelectionDAG expects i32 scalar argument type, whereas G_INSERT_VECTOR_ELT expects s8.
1 parent d859cb6 commit d1b1fbf

15 files changed

+174
-343
lines changed

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,3 +516,10 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
516516
(LD1Rv2d GPR64sp:$Rn)>;
517517
def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
518518
(LD1Rv1d GPR64sp:$Rn)>;
519+
520+
defm : LoadInsertPatterns<load, v16i8, v8i8, nxv16i8, i8,
521+
LDRBui, LDURBi, LDRBroW, LDRBroX,
522+
ro8, am_indexed8, am_unscaled8, uimm12s1, bsub>;
523+
def : Ld1Lane64Pat<load, VectorIndexB, v8i8, i8, LD1i8>;
524+
def : Ld1Lane128Pat<load, VectorIndexB, v16i8, i8, LD1i8>;
525+
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i8, VectorIndexB, INSvi8lane>;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7307,12 +7307,12 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
73077307
(VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
73087308
(i64 imm:$Immd))),
73097309
(INS V128:$src, imm:$Immd,
7310-
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
7310+
(VT128 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub)), imm:$Immn)>;
73117311

73127312
def : Pat<(VT64 (vector_insert V64:$src,
73137313
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
73147314
(i64 imm:$Immd))),
7315-
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
7315+
(EXTRACT_SUBREG (INS (VT128 (SUBREG_TO_REG (i64 0), V64:$src, dsub)),
73167316
imm:$Immd, V128:$Rn, imm:$Immn),
73177317
dsub)>;
73187318

llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -88,24 +88,16 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
8888
; CHECK-GISEL-NEXT: mov x8, sp
8989
; CHECK-GISEL-NEXT: str q0, [sp]
9090
; CHECK-GISEL-NEXT: and x9, x9, #0xf
91-
; CHECK-GISEL-NEXT: mov b2, v0.b[1]
92-
; CHECK-GISEL-NEXT: mov b3, v0.b[2]
9391
; CHECK-GISEL-NEXT: lsl x10, x9, #1
9492
; CHECK-GISEL-NEXT: sub x9, x10, x9
9593
; CHECK-GISEL-NEXT: ldr b1, [x8, x9]
96-
; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0]
97-
; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0]
98-
; CHECK-GISEL-NEXT: mov b2, v0.b[3]
99-
; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0]
100-
; CHECK-GISEL-NEXT: mov b3, v0.b[4]
101-
; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0]
102-
; CHECK-GISEL-NEXT: mov b2, v0.b[5]
103-
; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0]
104-
; CHECK-GISEL-NEXT: mov b3, v0.b[6]
105-
; CHECK-GISEL-NEXT: mov b0, v0.b[7]
106-
; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0]
107-
; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0]
108-
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0]
94+
; CHECK-GISEL-NEXT: mov v1.b[1], v0.b[1]
95+
; CHECK-GISEL-NEXT: mov v1.b[2], v0.b[2]
96+
; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[3]
97+
; CHECK-GISEL-NEXT: mov v1.b[4], v0.b[4]
98+
; CHECK-GISEL-NEXT: mov v1.b[5], v0.b[5]
99+
; CHECK-GISEL-NEXT: mov v1.b[6], v0.b[6]
100+
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[7]
109101
; CHECK-GISEL-NEXT: fmov d0, d1
110102
; CHECK-GISEL-NEXT: add sp, sp, #16
111103
; CHECK-GISEL-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

Lines changed: 16 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13326,10 +13326,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
1332613326
;
1332713327
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane:
1332813328
; CHECK-GI: ; %bb.0:
13329-
; CHECK-GI-NEXT: ldr b1, [x0]
13329+
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
1333013330
; CHECK-GI-NEXT: add x8, x0, x2
1333113331
; CHECK-GI-NEXT: str x8, [x1]
13332-
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
1333313332
; CHECK-GI-NEXT: ret
1333413333
%tmp1 = load i8, ptr %bar
1333513334
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
@@ -13373,11 +13372,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
1337313372
;
1337413373
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
1337513374
; CHECK-GI: ; %bb.0:
13376-
; CHECK-GI-NEXT: ldr b1, [x0]
1337713375
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
1337813376
; CHECK-GI-NEXT: add x8, x0, x2
13377+
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
1337913378
; CHECK-GI-NEXT: str x8, [x1]
13380-
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
1338113379
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
1338213380
; CHECK-GI-NEXT: ret
1338313381
%tmp1 = load i8, ptr %bar
@@ -13891,43 +13889,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half>
1389113889
}
1389213890

1389313891
define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) {
13894-
; CHECK-SD-LABEL: test_ld1lane_build_i8:
13895-
; CHECK-SD: ; %bb.0:
13896-
; CHECK-SD-NEXT: ldr b1, [x0]
13897-
; CHECK-SD-NEXT: ldr x8, [sp]
13898-
; CHECK-SD-NEXT: ld1.b { v1 }[1], [x1]
13899-
; CHECK-SD-NEXT: ld1.b { v1 }[2], [x2]
13900-
; CHECK-SD-NEXT: ld1.b { v1 }[3], [x3]
13901-
; CHECK-SD-NEXT: ld1.b { v1 }[4], [x4]
13902-
; CHECK-SD-NEXT: ld1.b { v1 }[5], [x5]
13903-
; CHECK-SD-NEXT: ld1.b { v1 }[6], [x6]
13904-
; CHECK-SD-NEXT: ld1.b { v1 }[7], [x7]
13905-
; CHECK-SD-NEXT: sub.8b v0, v1, v0
13906-
; CHECK-SD-NEXT: str d0, [x8]
13907-
; CHECK-SD-NEXT: ret
13908-
;
13909-
; CHECK-GI-LABEL: test_ld1lane_build_i8:
13910-
; CHECK-GI: ; %bb.0:
13911-
; CHECK-GI-NEXT: ldr b1, [x0]
13912-
; CHECK-GI-NEXT: ldr b2, [x1]
13913-
; CHECK-GI-NEXT: ldr x8, [sp]
13914-
; CHECK-GI-NEXT: mov.b v1[0], v1[0]
13915-
; CHECK-GI-NEXT: mov.b v1[1], v2[0]
13916-
; CHECK-GI-NEXT: ldr b2, [x2]
13917-
; CHECK-GI-NEXT: mov.b v1[2], v2[0]
13918-
; CHECK-GI-NEXT: ldr b2, [x3]
13919-
; CHECK-GI-NEXT: mov.b v1[3], v2[0]
13920-
; CHECK-GI-NEXT: ldr b2, [x4]
13921-
; CHECK-GI-NEXT: mov.b v1[4], v2[0]
13922-
; CHECK-GI-NEXT: ldr b2, [x5]
13923-
; CHECK-GI-NEXT: mov.b v1[5], v2[0]
13924-
; CHECK-GI-NEXT: ldr b2, [x6]
13925-
; CHECK-GI-NEXT: mov.b v1[6], v2[0]
13926-
; CHECK-GI-NEXT: ldr b2, [x7]
13927-
; CHECK-GI-NEXT: mov.b v1[7], v2[0]
13928-
; CHECK-GI-NEXT: sub.8b v0, v1, v0
13929-
; CHECK-GI-NEXT: str d0, [x8]
13930-
; CHECK-GI-NEXT: ret
13892+
; CHECK-LABEL: test_ld1lane_build_i8:
13893+
; CHECK: ; %bb.0:
13894+
; CHECK-NEXT: ldr b1, [x0]
13895+
; CHECK-NEXT: ldr x8, [sp]
13896+
; CHECK-NEXT: ld1.b { v1 }[1], [x1]
13897+
; CHECK-NEXT: ld1.b { v1 }[2], [x2]
13898+
; CHECK-NEXT: ld1.b { v1 }[3], [x3]
13899+
; CHECK-NEXT: ld1.b { v1 }[4], [x4]
13900+
; CHECK-NEXT: ld1.b { v1 }[5], [x5]
13901+
; CHECK-NEXT: ld1.b { v1 }[6], [x6]
13902+
; CHECK-NEXT: ld1.b { v1 }[7], [x7]
13903+
; CHECK-NEXT: sub.8b v0, v1, v0
13904+
; CHECK-NEXT: str d0, [x8]
13905+
; CHECK-NEXT: ret
1393113906
%ld.a = load i8, ptr %a
1393213907
%ld.b = load i8, ptr %b
1393313908
%ld.c = load i8, ptr %c

llvm/test/CodeGen/AArch64/arm64-ld1.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin
10041004
declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
10051005

10061006
define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
1007-
; CHECK-SD-LABEL: ld1_16b:
1008-
; CHECK-SD: // %bb.0:
1009-
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1010-
; CHECK-SD-NEXT: ret
1011-
;
1012-
; CHECK-GI-LABEL: ld1_16b:
1013-
; CHECK-GI: // %bb.0:
1014-
; CHECK-GI-NEXT: ldr b1, [x0]
1015-
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1016-
; CHECK-GI-NEXT: ret
1007+
; CHECK-LABEL: ld1_16b:
1008+
; CHECK: // %bb.0:
1009+
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
1010+
; CHECK-NEXT: ret
10171011
; Make sure we are using the operands defined by the ABI
10181012
%tmp1 = load i8, ptr %bar
10191013
%tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
@@ -1086,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) {
10861080
}
10871081

10881082
define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
1089-
; CHECK-SD-LABEL: ld1_8b:
1090-
; CHECK-SD: // %bb.0:
1091-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
1092-
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
1093-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
1094-
; CHECK-SD-NEXT: ret
1095-
;
1096-
; CHECK-GI-LABEL: ld1_8b:
1097-
; CHECK-GI: // %bb.0:
1098-
; CHECK-GI-NEXT: ldr b1, [x0]
1099-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1100-
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
1101-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1102-
; CHECK-GI-NEXT: ret
1083+
; CHECK-LABEL: ld1_8b:
1084+
; CHECK: // %bb.0:
1085+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1086+
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
1087+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
1088+
; CHECK-NEXT: ret
11031089
; Make sure we are using the operands defined by the ABI
11041090
%tmp1 = load i8, ptr %bar
11051091
%tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0

0 commit comments

Comments
 (0)