Skip to content

Commit 5e7631e

Browse files
authored
[LoongArch][DAGCombiner] Combine vand (vnot ..) to vandn (#161037)
After this commit, DAGCombiner will have more opportunities to perform vector folding. This patch includes several foldings, as follows: - VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)) - VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
1 parent f287abd commit 5e7631e

File tree

5 files changed

+223
-123
lines changed

5 files changed

+223
-123
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,59 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
615615
return SDValue();
616616
}
617617

618+
// Helper to attempt to return a cheaper, bit-inverted version of \p V.
619+
static SDValue isNOT(SDValue V, SelectionDAG &DAG) {
620+
// TODO: don't always ignore oneuse constraints.
621+
V = peekThroughBitcasts(V);
622+
EVT VT = V.getValueType();
623+
624+
// Match not(xor X, -1) -> X.
625+
if (V.getOpcode() == ISD::XOR &&
626+
(ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
627+
isAllOnesConstant(V.getOperand(1))))
628+
return V.getOperand(0);
629+
630+
// Match not(extract_subvector(not(X)) -> extract_subvector(X).
631+
if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
632+
(isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
633+
if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
634+
Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
635+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
636+
V.getOperand(1));
637+
}
638+
}
639+
640+
// Match not(SplatVector(not(X)) -> SplatVector(X).
641+
if (V.getOpcode() == ISD::BUILD_VECTOR) {
642+
if (SDValue SplatValue =
643+
cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
644+
if (!V->isOnlyUserOf(SplatValue.getNode()))
645+
return SDValue();
646+
647+
if (SDValue Not = isNOT(SplatValue, DAG)) {
648+
Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
649+
return DAG.getSplat(VT, SDLoc(Not), Not);
650+
}
651+
}
652+
}
653+
654+
// Match not(or(not(X),not(Y))) -> and(X, Y).
655+
if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
656+
V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
657+
// TODO: Handle cases with single NOT operand -> VANDN
658+
if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
659+
if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
660+
return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
661+
DAG.getBitcast(VT, Op1));
662+
}
663+
664+
// TODO: Add more matching patterns. Such as,
665+
// not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
666+
// not(slt(C, X)) -> slt(X - 1, C)
667+
668+
return SDValue();
669+
}
670+
618671
SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
619672
SelectionDAG &DAG) const {
620673
EVT VT = Op.getValueType();
@@ -5057,6 +5110,33 @@ void LoongArchTargetLowering::ReplaceNodeResults(
50575110
}
50585111
}
50595112

5113+
/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5114+
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL,
5115+
SelectionDAG &DAG) {
5116+
assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5117+
5118+
MVT VT = N->getSimpleValueType(0);
5119+
if (!VT.is128BitVector() && !VT.is256BitVector())
5120+
return SDValue();
5121+
5122+
SDValue X, Y;
5123+
SDValue N0 = N->getOperand(0);
5124+
SDValue N1 = N->getOperand(1);
5125+
5126+
if (SDValue Not = isNOT(N0, DAG)) {
5127+
X = Not;
5128+
Y = N1;
5129+
} else if (SDValue Not = isNOT(N1, DAG)) {
5130+
X = Not;
5131+
Y = N0;
5132+
} else
5133+
return SDValue();
5134+
5135+
X = DAG.getBitcast(VT, X);
5136+
Y = DAG.getBitcast(VT, Y);
5137+
return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5138+
}
5139+
50605140
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
50615141
TargetLowering::DAGCombinerInfo &DCI,
50625142
const LoongArchSubtarget &Subtarget) {
@@ -5074,6 +5154,9 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
50745154
SDValue NewOperand;
50755155
MVT GRLenVT = Subtarget.getGRLenVT();
50765156

5157+
if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5158+
return R;
5159+
50775160
// BSTRPICK requires the 32S feature.
50785161
if (!Subtarget.has32S())
50795162
return SDValue();
@@ -6751,6 +6834,69 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
67516834
return SDValue();
67526835
}
67536836

6837+
/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6838+
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG,
6839+
TargetLowering::DAGCombinerInfo &DCI,
6840+
const LoongArchSubtarget &Subtarget) {
6841+
SDValue N0 = N->getOperand(0);
6842+
SDValue N1 = N->getOperand(1);
6843+
MVT VT = N->getSimpleValueType(0);
6844+
SDLoc DL(N);
6845+
6846+
// VANDN(undef, x) -> 0
6847+
// VANDN(x, undef) -> 0
6848+
if (N0.isUndef() || N1.isUndef())
6849+
return DAG.getConstant(0, DL, VT);
6850+
6851+
// VANDN(0, x) -> x
6852+
if (ISD::isBuildVectorAllZeros(N0.getNode()))
6853+
return N1;
6854+
6855+
// VANDN(x, 0) -> 0
6856+
if (ISD::isBuildVectorAllZeros(N1.getNode()))
6857+
return DAG.getConstant(0, DL, VT);
6858+
6859+
// VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6860+
if (ISD::isBuildVectorAllOnes(N1.getNode()))
6861+
return DAG.getNOT(DL, N0, VT);
6862+
6863+
// Turn VANDN back to AND if input is inverted.
6864+
if (SDValue Not = isNOT(N0, DAG))
6865+
return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6866+
6867+
// Folds for better commutativity:
6868+
if (N1->hasOneUse()) {
6869+
// VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6870+
if (SDValue Not = isNOT(N1, DAG))
6871+
return DAG.getNOT(
6872+
DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6873+
6874+
// VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6875+
// -> NOT(OR(x, SplatVector(-Imm))
6876+
// Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6877+
// gain benefits.
6878+
if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6879+
N1.getOpcode() == ISD::BUILD_VECTOR) {
6880+
if (SDValue SplatValue =
6881+
cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6882+
if (!N1->isOnlyUserOf(SplatValue.getNode()))
6883+
return SDValue();
6884+
6885+
if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6886+
uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6887+
SDValue Not =
6888+
DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6889+
return DAG.getNOT(
6890+
DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6891+
VT);
6892+
}
6893+
}
6894+
}
6895+
}
6896+
6897+
return SDValue();
6898+
}
6899+
67546900
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
67556901
DAGCombinerInfo &DCI) const {
67566902
SelectionDAG &DAG = DCI.DAG;
@@ -6786,6 +6932,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
67866932
return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
67876933
case ISD::EXTRACT_VECTOR_ELT:
67886934
return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6935+
case LoongArchISD::VANDN:
6936+
return performVANDNCombine(N, DAG, DCI, Subtarget);
67896937
}
67906938
return SDValue();
67916939
}

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,7 +1400,7 @@ def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))),
14001400
(XVNOR_V LASX256:$xj, LASX256:$xk)>;
14011401
// XVANDN_V
14021402
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
1403-
def : Pat<(and (vt (vnot LASX256:$xj)), (vt LASX256:$xk)),
1403+
def : Pat<(loongarch_vandn (vt LASX256:$xj), (vt LASX256:$xk)),
14041404
(XVANDN_V LASX256:$xj, LASX256:$xk)>;
14051405
// XVORN_V
14061406
foreach vt = [v32i8, v16i16, v8i32, v4i64] in
@@ -1454,25 +1454,25 @@ defm : PatXr<ctlz, "XVCLZ">;
14541454
defm : PatXr<ctpop, "XVPCNT">;
14551455

14561456
// XVBITCLR_{B/H/W/D}
1457-
def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))),
1457+
def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1, v32i8:$xk)), v32i8:$xj),
14581458
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
1459-
def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))),
1459+
def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1, v16i16:$xk)), v16i16:$xj),
14601460
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
1461-
def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))),
1461+
def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1, v8i32:$xk)), v8i32:$xj),
14621462
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
1463-
def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))),
1463+
def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1, v4i64:$xk)), v4i64:$xj),
14641464
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
1465-
def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1,
1466-
(vsplati8imm7 v32i8:$xk)))),
1465+
def : Pat<(loongarch_vandn (v32i8 (shl vsplat_imm_eq_1,
1466+
(vsplati8imm7 v32i8:$xk))), v32i8:$xj),
14671467
(v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>;
1468-
def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1,
1469-
(vsplati16imm15 v16i16:$xk)))),
1468+
def : Pat<(loongarch_vandn (v16i16 (shl vsplat_imm_eq_1,
1469+
(vsplati16imm15 v16i16:$xk))), v16i16:$xj),
14701470
(v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>;
1471-
def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1,
1472-
(vsplati32imm31 v8i32:$xk)))),
1471+
def : Pat<(loongarch_vandn (v8i32 (shl vsplat_imm_eq_1,
1472+
(vsplati32imm31 v8i32:$xk))), v8i32:$xj),
14731473
(v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>;
1474-
def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1,
1475-
(vsplati64imm63 v4i64:$xk)))),
1474+
def : Pat<(loongarch_vandn (v4i64 (shl vsplat_imm_eq_1,
1475+
(vsplati64imm63 v4i64:$xk))), v4i64:$xj),
14761476
(v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>;
14771477

14781478
// XVBITCLRI_{B/H/W/D}

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
6262
def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
6363
def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
6464
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
65+
def loongarch_vandn: SDNode<"LoongArchISD::VANDN", SDT_LoongArchV2R>;
6566

6667
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
6768
def loongarch_vshuf4i_d : SDNode<"LoongArchISD::VSHUF4I_D", SDT_LoongArchV2RUimm>;
@@ -1609,7 +1610,7 @@ def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))),
16091610
(VNOR_V LSX128:$vj, LSX128:$vk)>;
16101611
// VANDN_V
16111612
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
1612-
def : Pat<(and (vt (vnot LSX128:$vj)), (vt LSX128:$vk)),
1613+
def : Pat<(loongarch_vandn (vt LSX128:$vj), (vt LSX128:$vk)),
16131614
(VANDN_V LSX128:$vj, LSX128:$vk)>;
16141615
// VORN_V
16151616
foreach vt = [v16i8, v8i16, v4i32, v2i64] in
@@ -1663,25 +1664,25 @@ defm : PatVr<ctlz, "VCLZ">;
16631664
defm : PatVr<ctpop, "VPCNT">;
16641665

16651666
// VBITCLR_{B/H/W/D}
1666-
def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))),
1667+
def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1, v16i8:$vk)), v16i8:$vj),
16671668
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
1668-
def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))),
1669+
def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1, v8i16:$vk)), v8i16:$vj),
16691670
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
1670-
def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))),
1671+
def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1, v4i32:$vk)), v4i32:$vj),
16711672
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
1672-
def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))),
1673+
def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1, v2i64:$vk)), v2i64:$vj),
16731674
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
1674-
def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1,
1675-
(vsplati8imm7 v16i8:$vk)))),
1675+
def : Pat<(loongarch_vandn (v16i8 (shl vsplat_imm_eq_1,
1676+
(vsplati8imm7 v16i8:$vk))), v16i8:$vj),
16761677
(v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>;
1677-
def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1,
1678-
(vsplati16imm15 v8i16:$vk)))),
1678+
def : Pat<(loongarch_vandn (v8i16 (shl vsplat_imm_eq_1,
1679+
(vsplati16imm15 v8i16:$vk))), v8i16:$vj),
16791680
(v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>;
1680-
def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1,
1681-
(vsplati32imm31 v4i32:$vk)))),
1681+
def : Pat<(loongarch_vandn (v4i32 (shl vsplat_imm_eq_1,
1682+
(vsplati32imm31 v4i32:$vk))), v4i32:$vj),
16821683
(v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>;
1683-
def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1,
1684-
(vsplati64imm63 v2i64:$vk)))),
1684+
def : Pat<(loongarch_vandn (v2i64 (shl vsplat_imm_eq_1,
1685+
(vsplati64imm63 v2i64:$vk))), v2i64:$vj),
16851686
(v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>;
16861687

16871688
// VBITCLRI_{B/H/W/D}

0 commit comments

Comments
 (0)