Skip to content

Commit 11cf807

Browse files
committed
[AArch64][CodeGen] Always use SVE (when enabled) to lower integer divides
This patch adds custom lowering support for ISD::SDIV and ISD::UDIV when SVE is enabled, regardless of the minimum SVE vector length. We do this because NEON simply does not have vector integer divide support, so we want to take advantage of these instructions in SVE. As part of this patch I've also simplified LowerToPredicatedOp to avoid re-asking the same question about whether we should be using SVE for fixed length vectors. Once we've made the decision to call LowerToPredicatedOp, then we should simply assert we should be using SVE. I've updated the 128-bit min SVE vector bits tests here: CodeGen/AArch64/sve-fixed-length-int-div.ll CodeGen/AArch64/sve-fixed-length-int-rem.ll Differential Revision: https://reviews.llvm.org/D117871
1 parent 1c9f154 commit 11cf807

File tree

4 files changed

+428
-49
lines changed

4 files changed

+428
-49
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,6 +1331,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13311331
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
13321332
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
13331333

1334+
// NEON doesn't support integer divides, but SVE does
1335+
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1336+
MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1337+
setOperationAction(ISD::SDIV, VT, Custom);
1338+
setOperationAction(ISD::UDIV, VT, Custom);
1339+
}
1340+
13341341
// NOTE: Currently this has to happen after computeRegisterProperties rather
13351342
// than the preferred option of combining it with the addRegisterClass call.
13361343
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1363,26 +1370,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13631370
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
13641371
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
13651372
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1366-
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1367-
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1368-
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1369-
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1370-
setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1371-
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1372-
setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1373-
setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
13741373
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
13751374
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
13761375
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
13771376
setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1378-
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1379-
setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1380-
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1381-
setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1382-
setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1383-
setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1384-
setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1385-
setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
13861377
setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
13871378
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
13881379
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
@@ -3956,7 +3947,7 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
39563947
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
39573948

39583949
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3959-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3950+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
39603951

39613952
// Multiplications are only custom-lowered for 128-bit vectors so that
39623953
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
@@ -5157,11 +5148,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
51575148
case ISD::MUL:
51585149
return LowerMUL(Op, DAG);
51595150
case ISD::MULHS:
5160-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
5161-
/*OverrideNEON=*/true);
5151+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
51625152
case ISD::MULHU:
5163-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
5164-
/*OverrideNEON=*/true);
5153+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
51655154
case ISD::INTRINSIC_W_CHAIN:
51665155
return LowerINTRINSIC_W_CHAIN(Op, DAG);
51675156
case ISD::INTRINSIC_WO_CHAIN:
@@ -5252,8 +5241,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
52525241
case ISD::BSWAP:
52535242
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
52545243
case ISD::CTLZ:
5255-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5256-
/*OverrideNEON=*/true);
5244+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
52575245
case ISD::CTTZ:
52585246
return LowerCTTZ(Op, DAG);
52595247
case ISD::VECTOR_SPLICE:
@@ -7514,17 +7502,13 @@ SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
75147502
default:
75157503
llvm_unreachable("Wrong instruction");
75167504
case ISD::SMAX:
7517-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
7518-
/*OverrideNEON=*/true);
7505+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
75197506
case ISD::SMIN:
7520-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
7521-
/*OverrideNEON=*/true);
7507+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
75227508
case ISD::UMAX:
7523-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
7524-
/*OverrideNEON=*/true);
7509+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
75257510
case ISD::UMIN:
7526-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
7527-
/*OverrideNEON=*/true);
7511+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
75287512
}
75297513
}
75307514

@@ -7540,8 +7524,7 @@ SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
75407524

75417525
if (VT.isScalableVector() ||
75427526
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
7543-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
7544-
true);
7527+
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
75457528

75467529
SDLoc DL(Op);
75477530
SDValue REVB;
@@ -11189,7 +11172,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1118911172
EVT VT = Op.getValueType();
1119011173
SDLoc dl(Op);
1119111174

11192-
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
11175+
if (VT.isFixedLengthVector() && Subtarget->hasSVE())
1119311176
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
1119411177

1119511178
assert(VT.isScalableVector() && "Expected a scalable vector.");
@@ -19224,7 +19207,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
1922419207

1922519208
// Scalable vector i32/i64 DIV is supported.
1922619209
if (EltVT == MVT::i32 || EltVT == MVT::i64)
19227-
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
19210+
return LowerToPredicatedOp(Op, DAG, PredOpcode);
1922819211

1922919212
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
1923019213
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
@@ -19379,13 +19362,14 @@ SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
1937919362
// NOTE: The results for inactive lanes are undefined.
1938019363
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
1938119364
SelectionDAG &DAG,
19382-
unsigned NewOp,
19383-
bool OverrideNEON) const {
19365+
unsigned NewOp) const {
1938419366
EVT VT = Op.getValueType();
1938519367
SDLoc DL(Op);
1938619368
auto Pg = getPredicateForVector(DAG, DL, VT);
1938719369

19388-
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
19370+
if (VT.isFixedLengthVector()) {
19371+
assert(VT.getFixedSizeInBits() <= Subtarget->getMinSVEVectorSizeInBits() &&
19372+
"Cannot use SVE to lower fixed length predicated op!");
1938919373
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
1939019374

1939119375
// Create list of operands by converting existing ones to scalable types.
@@ -19403,7 +19387,8 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
1940319387
continue;
1940419388
}
1940519389

19406-
assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
19390+
assert(V.getValueType().getFixedSizeInBits() <=
19391+
Subtarget->getMinSVEVectorSizeInBits() &&
1940719392
"Only fixed length vectors are supported!");
1940819393
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
1940919394
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -980,8 +980,8 @@ class AArch64TargetLowering : public TargetLowering {
980980
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
981981
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
982982
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
983-
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
984-
bool OverrideNEON = false) const;
983+
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
984+
unsigned NewOp) const;
985985
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
986986
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
987987
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)