Skip to content

Commit 857c65f

Browse files
SpencerAbsonfolkertdev
authored andcommitted
[AArch64] Lower saturating add/sub intrinsics to generic ISD nodes
1 parent 4504e77 commit 857c65f

File tree

6 files changed

+282
-151
lines changed

6 files changed

+282
-151
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6235,6 +6235,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
62356235
DAG.getNode(
62366236
AArch64ISD::URSHR_I, dl, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2)));
62376237
return SDValue();
6238+
case Intrinsic::aarch64_neon_sqadd:
6239+
if (Op.getValueType().isVector())
6240+
return DAG.getNode(ISD::SADDSAT, dl, Op.getValueType(), Op.getOperand(1),
6241+
Op.getOperand(2));
6242+
return SDValue();
6243+
case Intrinsic::aarch64_neon_sqsub:
6244+
if (Op.getValueType().isVector())
6245+
return DAG.getNode(ISD::SSUBSAT, dl, Op.getValueType(), Op.getOperand(1),
6246+
Op.getOperand(2));
6247+
return SDValue();
6248+
case Intrinsic::aarch64_neon_uqadd:
6249+
if (Op.getValueType().isVector())
6250+
return DAG.getNode(ISD::UADDSAT, dl, Op.getValueType(), Op.getOperand(1),
6251+
Op.getOperand(2));
6252+
return SDValue();
6253+
case Intrinsic::aarch64_neon_uqsub:
6254+
if (Op.getValueType().isVector())
6255+
return DAG.getNode(ISD::USUBSAT, dl, Op.getValueType(), Op.getOperand(1),
6256+
Op.getOperand(2));
6257+
return SDValue();
62386258
case Intrinsic::aarch64_sve_whilelt:
62396259
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
62406260
/*IsEqual=*/false);

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6256,24 +6256,6 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
62566256
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
62576257
}
62586258

6259-
multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
6260-
def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
6261-
(!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
6262-
def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
6263-
(!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
6264-
def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
6265-
(!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
6266-
6267-
def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
6268-
(!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
6269-
def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
6270-
(!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
6271-
def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
6272-
(!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
6273-
def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
6274-
(!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
6275-
}
6276-
62776259
// As above, but D sized elements unsupported.
62786260
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
62796261
SDPatternOperator OpNode> {
@@ -9861,14 +9843,15 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
98619843
}
98629844

98639845
multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
9864-
SDPatternOperator Accum> {
9846+
SDPatternOperator VecAcc,
9847+
SDPatternOperator ScalAcc> {
98659848
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
98669849
V128, V64,
98679850
V128_lo, VectorIndexH,
98689851
asm, ".4s", ".4s", ".4h", ".h",
98699852
[(set (v4i32 V128:$dst),
9870-
(Accum (v4i32 V128:$Rd),
9871-
(v4i32 (int_aarch64_neon_sqdmull
9853+
(VecAcc (v4i32 V128:$Rd),
9854+
(v4i32 (int_aarch64_neon_sqdmull
98729855
(v4i16 V64:$Rn),
98739856
(dup_v8i16 (v8i16 V128_lo:$Rm),
98749857
VectorIndexH:$idx)))))]> {
@@ -9883,8 +9866,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
98839866
V128_lo, VectorIndexH,
98849867
asm#"2", ".4s", ".4s", ".8h", ".h",
98859868
[(set (v4i32 V128:$dst),
9886-
(Accum (v4i32 V128:$Rd),
9887-
(v4i32 (int_aarch64_neon_sqdmull
9869+
(VecAcc (v4i32 V128:$Rd),
9870+
(v4i32 (int_aarch64_neon_sqdmull
98889871
(extract_high_v8i16 (v8i16 V128:$Rn)),
98899872
(extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
98909873
bits<3> idx;
@@ -9898,8 +9881,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
98989881
V128, VectorIndexS,
98999882
asm, ".2d", ".2d", ".2s", ".s",
99009883
[(set (v2i64 V128:$dst),
9901-
(Accum (v2i64 V128:$Rd),
9902-
(v2i64 (int_aarch64_neon_sqdmull
9884+
(VecAcc (v2i64 V128:$Rd),
9885+
(v2i64 (int_aarch64_neon_sqdmull
99039886
(v2i32 V64:$Rn),
99049887
(dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
99059888
bits<2> idx;
@@ -9912,8 +9895,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99129895
V128, VectorIndexS,
99139896
asm#"2", ".2d", ".2d", ".4s", ".s",
99149897
[(set (v2i64 V128:$dst),
9915-
(Accum (v2i64 V128:$Rd),
9916-
(v2i64 (int_aarch64_neon_sqdmull
9898+
(VecAcc (v2i64 V128:$Rd),
9899+
(v2i64 (int_aarch64_neon_sqdmull
99179900
(extract_high_v4i32 (v4i32 V128:$Rn)),
99189901
(extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
99199902
bits<2> idx;
@@ -9930,8 +9913,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99309913
let Inst{20} = idx{0};
99319914
}
99329915

9933-
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
9934-
(i32 (vector_extract
9916+
def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
9917+
(i32 (vector_extract
99359918
(v4i32 (int_aarch64_neon_sqdmull
99369919
(v4i16 V64:$Rn),
99379920
(v4i16 V64:$Rm))),
@@ -9942,8 +9925,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99429925
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
99439926
(i64 0))>;
99449927

9945-
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
9946-
(i32 (vector_extract
9928+
def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
9929+
(i32 (vector_extract
99479930
(v4i32 (int_aarch64_neon_sqdmull
99489931
(v4i16 V64:$Rn),
99499932
(dup_v8i16 (v8i16 V128_lo:$Rm),
@@ -9959,8 +9942,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99599942
FPR64Op, FPR32Op, V128, VectorIndexS,
99609943
asm, ".s", "", "", ".s",
99619944
[(set (i64 FPR64Op:$dst),
9962-
(Accum (i64 FPR64Op:$Rd),
9963-
(i64 (int_aarch64_neon_sqdmulls_scalar
9945+
(ScalAcc (i64 FPR64Op:$Rd),
9946+
(i64 (int_aarch64_neon_sqdmulls_scalar
99649947
(i32 FPR32Op:$Rn),
99659948
(i32 (vector_extract (v4i32 V128:$Rm),
99669949
VectorIndexS:$idx))))))]> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5811,12 +5811,12 @@ defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp
58115811
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
58125812
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
58135813
defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5814-
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5814+
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", saddsat>;
58155815
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
58165816
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
58175817
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
58185818
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5819-
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5819+
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", ssubsat>;
58205820
defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
58215821
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
58225822
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
@@ -5830,10 +5830,10 @@ defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp
58305830
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
58315831
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
58325832
defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5833-
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5833+
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", uaddsat>;
58345834
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
58355835
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5836-
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5836+
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", usubsat>;
58375837
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
58385838
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
58395839
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
@@ -5842,12 +5842,6 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
58425842
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
58435843
int_aarch64_neon_sqrdmlsh>;
58445844

5845-
// Extra saturate patterns, other than the intrinsics matches above
5846-
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5847-
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5848-
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5849-
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5850-
58515845
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
58525846
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
58535847
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -6563,10 +6557,8 @@ defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
65636557
defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
65646558
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
65656559
defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
6566-
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
6567-
int_aarch64_neon_sqadd>;
6568-
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
6569-
int_aarch64_neon_sqsub>;
6560+
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
6561+
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
65706562
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
65716563
int_aarch64_neon_sqdmull>;
65726564
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
@@ -8125,9 +8117,9 @@ defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
81258117
defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
81268118
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
81278119
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
8128-
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
8120+
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
81298121
int_aarch64_neon_sqadd>;
8130-
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
8122+
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
81318123
int_aarch64_neon_sqsub>;
81328124
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
81338125
int_aarch64_neon_sqrdmlah>;

0 commit comments

Comments
 (0)