Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1114,12 +1114,15 @@ let RecomputePerFunction = 1 in {

def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;

defvar GISelLegalizedCheck = "(!MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) && MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized))";
def OnlyGISel : Predicate<GISelLegalizedCheck>;
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
// optimizing. This allows us to selectively use patterns without impacting
// SelectionDAG's behaviour.
// FIXME: One day there will probably be a nicer way to check for this, but
// today is not that day.
def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
def OptimizedGISelOrOtherSelector : Predicate<!strconcat("!MF->getFunction().hasOptNone() || !", GISelLegalizedCheck)>;
}

include "AArch64InstrFormats.td"
Expand Down Expand Up @@ -4036,6 +4039,11 @@ multiclass LoadInsertPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType
ROXLoadInst, ro, Addr, UnscaledAddr, AddrImm, SubReg>;
}

let Predicates = [OnlyGISel] in {
defm : LoadInsertPatterns<load, v16i8, v8i8, nxv16i8, i8,
LDRBui, LDURBi, LDRBroW, LDRBroX,
ro8, am_indexed8, am_unscaled8, uimm12s1, bsub>;
}
defm : LoadInsertPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32,
LDRBui, LDURBi, LDRBroW, LDRBroX,
ro8, am_indexed8, am_unscaled8, uimm12s1, bsub>;
Expand Down Expand Up @@ -7307,12 +7315,12 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
(VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
(i64 imm:$Immd))),
(INS V128:$src, imm:$Immd,
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
(VT128 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub)), imm:$Immn)>;

def : Pat<(VT64 (vector_insert V64:$src,
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
(i64 imm:$Immd))),
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
(EXTRACT_SUBREG (INS (VT128 (SUBREG_TO_REG (i64 0), V64:$src, dsub)),
imm:$Immd, V128:$Rn, imm:$Immn),
dsub)>;

Expand All @@ -7330,6 +7338,9 @@ defm : Neon_INS_elt_pattern<v8bf16, v4bf16, nxv8bf16, bf16, VectorIndexH, INSvi1
defm : Neon_INS_elt_pattern<v4f32, v2f32, nxv4f32, f32, VectorIndexS, INSvi32lane, DUPi32, ssub>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, nxv2f64, f64, VectorIndexD, INSvi64lane, DUPi64, dsub>;

let Predicates = [OnlyGISel] in {
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i8, VectorIndexB, INSvi8lane, DUPi8, bsub>;
}
defm : Neon_INS_elt_pattern<v16i8, v8i8, nxv16i8, i32, VectorIndexB, INSvi8lane, DUPi8, bsub>;
defm : Neon_INS_elt_pattern<v8i16, v4i16, nxv8i16, i32, VectorIndexH, INSvi16lane, DUPi16, hsub>;
defm : Neon_INS_elt_pattern<v4i32, v2i32, nxv4i32, i32, VectorIndexS, INSvi32lane, DUPi32, ssub>;
Expand Down Expand Up @@ -8752,6 +8763,9 @@ class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
(STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
(LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;

let Predicates = [OnlyGISel] in {
def : Ld1Lane128Pat<load, VectorIndexB, v16i8, i8, LD1i8>;
}
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
Expand Down Expand Up @@ -8825,6 +8839,9 @@ class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
VecIndex:$idx, GPR64sp:$Rn),
dsub)>;

let Predicates = [OnlyGISel] in {
def : Ld1Lane64Pat<load, VectorIndexB, v8i8, i8, LD1i8>;
}
def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
Expand Down
22 changes: 7 additions & 15 deletions llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -88,24 +88,16 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: and x9, x9, #0xf
; CHECK-GISEL-NEXT: mov b2, v0.b[1]
; CHECK-GISEL-NEXT: mov b3, v0.b[2]
; CHECK-GISEL-NEXT: lsl x10, x9, #1
; CHECK-GISEL-NEXT: sub x9, x10, x9
; CHECK-GISEL-NEXT: ldr b1, [x8, x9]
; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0]
; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0]
; CHECK-GISEL-NEXT: mov b2, v0.b[3]
; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0]
; CHECK-GISEL-NEXT: mov b3, v0.b[4]
; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0]
; CHECK-GISEL-NEXT: mov b2, v0.b[5]
; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0]
; CHECK-GISEL-NEXT: mov b3, v0.b[6]
; CHECK-GISEL-NEXT: mov b0, v0.b[7]
; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0]
; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0]
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0]
; CHECK-GISEL-NEXT: mov v1.b[1], v0.b[1]
; CHECK-GISEL-NEXT: mov v1.b[2], v0.b[2]
; CHECK-GISEL-NEXT: mov v1.b[3], v0.b[3]
; CHECK-GISEL-NEXT: mov v1.b[4], v0.b[4]
; CHECK-GISEL-NEXT: mov v1.b[5], v0.b[5]
; CHECK-GISEL-NEXT: mov v1.b[6], v0.b[6]
; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[7]
; CHECK-GISEL-NEXT: fmov d0, d1
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: ret
Expand Down
57 changes: 16 additions & 41 deletions llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13326,10 +13326,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
;
; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: ldr b1, [x0]
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
; CHECK-GI-NEXT: add x8, x0, x2
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
; CHECK-GI-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
Expand Down Expand Up @@ -13373,11 +13372,10 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i
;
; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: ldr b1, [x0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: add x8, x0, x2
; CHECK-GI-NEXT: ld1.b { v0 }[1], [x0]
; CHECK-GI-NEXT: str x8, [x1]
; CHECK-GI-NEXT: mov.b v0[1], v1[0]
; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%tmp1 = load i8, ptr %bar
Expand Down Expand Up @@ -13891,43 +13889,20 @@ define void @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half>
}

define void @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) {
; CHECK-SD-LABEL: test_ld1lane_build_i8:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: ldr b1, [x0]
; CHECK-SD-NEXT: ldr x8, [sp]
; CHECK-SD-NEXT: ld1.b { v1 }[1], [x1]
; CHECK-SD-NEXT: ld1.b { v1 }[2], [x2]
; CHECK-SD-NEXT: ld1.b { v1 }[3], [x3]
; CHECK-SD-NEXT: ld1.b { v1 }[4], [x4]
; CHECK-SD-NEXT: ld1.b { v1 }[5], [x5]
; CHECK-SD-NEXT: ld1.b { v1 }[6], [x6]
; CHECK-SD-NEXT: ld1.b { v1 }[7], [x7]
; CHECK-SD-NEXT: sub.8b v0, v1, v0
; CHECK-SD-NEXT: str d0, [x8]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_ld1lane_build_i8:
; CHECK-GI: ; %bb.0:
; CHECK-GI-NEXT: ldr b1, [x0]
; CHECK-GI-NEXT: ldr b2, [x1]
; CHECK-GI-NEXT: ldr x8, [sp]
; CHECK-GI-NEXT: mov.b v1[0], v1[0]
; CHECK-GI-NEXT: mov.b v1[1], v2[0]
; CHECK-GI-NEXT: ldr b2, [x2]
; CHECK-GI-NEXT: mov.b v1[2], v2[0]
; CHECK-GI-NEXT: ldr b2, [x3]
; CHECK-GI-NEXT: mov.b v1[3], v2[0]
; CHECK-GI-NEXT: ldr b2, [x4]
; CHECK-GI-NEXT: mov.b v1[4], v2[0]
; CHECK-GI-NEXT: ldr b2, [x5]
; CHECK-GI-NEXT: mov.b v1[5], v2[0]
; CHECK-GI-NEXT: ldr b2, [x6]
; CHECK-GI-NEXT: mov.b v1[6], v2[0]
; CHECK-GI-NEXT: ldr b2, [x7]
; CHECK-GI-NEXT: mov.b v1[7], v2[0]
; CHECK-GI-NEXT: sub.8b v0, v1, v0
; CHECK-GI-NEXT: str d0, [x8]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_ld1lane_build_i8:
; CHECK: ; %bb.0:
; CHECK-NEXT: ldr b1, [x0]
; CHECK-NEXT: ldr x8, [sp]
; CHECK-NEXT: ld1.b { v1 }[1], [x1]
; CHECK-NEXT: ld1.b { v1 }[2], [x2]
; CHECK-NEXT: ld1.b { v1 }[3], [x3]
; CHECK-NEXT: ld1.b { v1 }[4], [x4]
; CHECK-NEXT: ld1.b { v1 }[5], [x5]
; CHECK-NEXT: ld1.b { v1 }[6], [x6]
; CHECK-NEXT: ld1.b { v1 }[7], [x7]
; CHECK-NEXT: sub.8b v0, v1, v0
; CHECK-NEXT: str d0, [x8]
; CHECK-NEXT: ret
%ld.a = load i8, ptr %a
%ld.b = load i8, ptr %b
%ld.c = load i8, ptr %c
Expand Down
34 changes: 10 additions & 24 deletions llvm/test/CodeGen/AArch64/arm64-ld1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1004,16 +1004,10 @@ declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwin
declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly

define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
; CHECK-SD-LABEL: ld1_16b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ld1_16b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr b1, [x0]
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: ld1_16b:
; CHECK: // %bb.0:
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
Expand Down Expand Up @@ -1086,20 +1080,12 @@ define <1 x i64> @ld1_1d(ptr %p) {
}

define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
; CHECK-SD-LABEL: ld1_8b:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: ld1_8b:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr b1, [x0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov.b v0[0], v1[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: ld1_8b:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ld1.b { v0 }[0], [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
; Make sure we are using the operands defined by the ABI
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
Expand Down
Loading
Loading