Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4554,6 +4554,26 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues({Sum, OutFlag}, DL);
}

static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
SelectionDAG &DAG) {
SDLoc DL(Op);
auto getFloatVT = [](EVT VT) {
assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
return VT == MVT::i32 ? MVT::f32 : MVT::f64;
};
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
};
SmallVector<SDValue, 2> NewOps;
NewOps.reserve(Op.getNumOperands() - 1);

for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
EVT OrigVT = Op.getValueType();
SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
return DAG.getBitcast(OrigVT, OpNode);
}

static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
Expand Down Expand Up @@ -6400,26 +6420,46 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHL, DAG);
case Intrinsic::aarch64_neon_sqshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHL, DAG);
case Intrinsic::aarch64_neon_uqrshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHL, DAG);
case Intrinsic::aarch64_neon_uqshl:
if (Op.getValueType().isVector())
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHL, DAG);
case Intrinsic::aarch64_neon_sqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQADD, DAG);

case Intrinsic::aarch64_neon_sqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::SSUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSUB, DAG);

case Intrinsic::aarch64_neon_uqadd:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQADD, DAG);
case Intrinsic::aarch64_neon_uqsub:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSUB, DAG);
case Intrinsic::aarch64_neon_sqdmulls_scalar:
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQDMULL, DAG);
case Intrinsic::aarch64_sve_whilelt:
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
/*IsEqual=*/false);
Expand Down
56 changes: 43 additions & 13 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -7700,16 +7700,21 @@ multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
}

multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode, SDPatternOperator SatOp> {
SDPatternOperator OpNode, SDPatternOperator G_OpNode, SDPatternOperator SatOp> {
def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (SatOp (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;

def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
def : Pat<(i64 (G_OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
def : Pat<(i32 (G_OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
(!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;

def : Pat<(f64 (OpNode FPR64:$Rn, FPR64:$Rm)),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(f32 (OpNode FPR32:$Rn, FPR32:$Rm)),
(!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
}

Expand Down Expand Up @@ -7795,7 +7800,7 @@ multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm,
def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
(outs FPR64:$Rd),
(ins FPR32:$Rn, FPR32:$Rm), asm, "",
[(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
[(set (f64 FPR64:$Rd), (OpNode FPR32:$Rn, FPR32:$Rm))]>;
}

let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
Expand Down Expand Up @@ -9800,7 +9805,8 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,

multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
SDPatternOperator VecAcc,
SDPatternOperator ScalAcc> {
SDPatternOperator ScalAcc,
SDPatternOperator G_ScalAcc> {
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
V128, V64,
V128_lo, VectorIndexH,
Expand Down Expand Up @@ -9869,7 +9875,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
let Inst{20} = idx{0};
}

def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
def : Pat<(i32 (G_ScalAcc (i32 FPR32Op:$Rd),
(i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
Expand All @@ -9881,7 +9887,19 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
(i64 0))>;

def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
def : Pat<(f32 (ScalAcc FPR32Op:$Rd,
(bitconvert (i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
(v4i16 V64:$Rm))),
(i64 0)))))),
(!cast<Instruction>(NAME # v1i32_indexed)
FPR32Op:$Rd,
(f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
(i64 0))>;

def : Pat<(i32 (G_ScalAcc (i32 FPR32Op:$Rd),
(i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
Expand All @@ -9894,15 +9912,27 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
V128_lo:$Rm,
VectorIndexH:$idx)>;

def : Pat<(f32 (ScalAcc FPR32Op:$Rd,
(bitconvert (i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
(dup_v8i16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx))),
(i64 0)))))),
(!cast<Instruction>(NAME # v1i32_indexed)
FPR32Op:$Rd,
(f16 (EXTRACT_SUBREG V64:$Rn, hsub)),
V128_lo:$Rm,
VectorIndexH:$idx)>;

def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
FPR64Op, FPR32Op, V128, VectorIndexS,
asm, ".s", "", "", ".s",
[(set (i64 FPR64Op:$dst),
(ScalAcc (i64 FPR64Op:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar
(i32 FPR32Op:$Rn),
(i32 (vector_extract (v4i32 V128:$Rm),
VectorIndexS:$idx))))))]> {
[(set (f64 FPR64Op:$dst),
(ScalAcc FPR64Op:$Rd,
(AArch64sqdmull FPR32Op:$Rn,
(bitconvert (i32 (vector_extract (v4i32 V128:$Rm),
VectorIndexS:$idx))))))]> {

bits<2> idx;
let Inst{11} = idx{1};
Expand Down
45 changes: 28 additions & 17 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,18 @@ def AArch64fcvtnu_half : SDNode<"AArch64ISD::FCVTNU_HALF", SDTFPExtendOp>;
def AArch64fcvtps_half : SDNode<"AArch64ISD::FCVTPS_HALF", SDTFPExtendOp>;
def AArch64fcvtpu_half : SDNode<"AArch64ISD::FCVTPU_HALF", SDTFPExtendOp>;

def AArch64sqadd: SDNode<"AArch64ISD::SQADD", SDTFPBinOp>;
def AArch64sqrshl: SDNode<"AArch64ISD::SQRSHL", SDTFPBinOp>;
def AArch64sqshl: SDNode<"AArch64ISD::SQSHL", SDTFPBinOp>;
def AArch64sqsub: SDNode<"AArch64ISD::SQSUB", SDTFPBinOp>;
def AArch64uqadd: SDNode<"AArch64ISD::UQADD", SDTFPBinOp>;
def AArch64uqrshl: SDNode<"AArch64ISD::UQRSHL", SDTFPBinOp>;
def AArch64uqshl: SDNode<"AArch64ISD::UQSHL", SDTFPBinOp>;
def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;

//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;

// Vector immediate ops
Expand Down Expand Up @@ -6433,19 +6445,19 @@ defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd, saddsat>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", AArch64sqadd, int_aarch64_neon_sqadd, saddsat>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub, ssubsat>;
defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl", AArch64sqrshl, int_aarch64_neon_sqrshl, int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", AArch64sqshl, int_aarch64_neon_sqshl, int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", AArch64sqsub, int_aarch64_neon_sqsub, ssubsat>;
defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd, uaddsat>;
defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub, usubsat>;
defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", AArch64uqadd, int_aarch64_neon_uqadd, uaddsat>;
defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl", AArch64uqrshl, int_aarch64_neon_uqrshl, int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", AArch64uqshl, int_aarch64_neon_uqshl, int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", AArch64uqsub, int_aarch64_neon_uqsub, usubsat>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
Expand Down Expand Up @@ -6496,17 +6508,16 @@ def : InstAlias<"faclt $dst, $src1, $src2",
// Advanced SIMD three scalar instructions (mixed operands).
//===----------------------------------------------------------------------===//
defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
int_aarch64_neon_sqdmulls_scalar>;
AArch64sqdmull>;
defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;

def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
def : Pat<(f64 (AArch64sqadd FPR64:$Rd,
(AArch64sqdmull FPR32:$Rn, FPR32:$Rm))),
(SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),

def : Pat<(f64 (AArch64sqsub FPR64:$Rd,
(AArch64sqdmull FPR32:$Rn, FPR32:$Rm))),
(SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -8734,9 +8745,9 @@ defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
int_aarch64_neon_sqadd>;
AArch64sqadd, int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
int_aarch64_neon_sqsub>;
AArch64sqsub, int_aarch64_neon_sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
Expand Down
Loading
Loading