Skip to content

Commit 3831082

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.6-beta.1 [skip ci]
1 parent d3daa3c commit 3831082

File tree

5 files changed

+41185
-3
lines changed

5 files changed

+41185
-3
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14474,17 +14474,78 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
1447414474
return true;
1447514475
}
1447614476

14477+
/// Try to map an integer comparison with size > XLEN to vector instructions
14478+
/// before type legalization splits it up into chunks.
14479+
static SDValue
14480+
combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
14481+
const SDLoc &DL, SelectionDAG &DAG,
14482+
const RISCVSubtarget &Subtarget) {
14483+
assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
14484+
14485+
if (!Subtarget.hasVInstructions())
14486+
return SDValue();
14487+
14488+
MVT XLenVT = Subtarget.getXLenVT();
14489+
EVT OpVT = X.getValueType();
14490+
// We're looking for an oversized integer equality comparison.
14491+
if (OpVT.isScalableVT() || !OpVT.isScalarInteger())
14492+
return SDValue();
14493+
14494+
unsigned OpSize = OpVT.getSizeInBits();
14495+
// TODO: Support non-power-of-2 types.
14496+
if (!isPowerOf2_32(OpSize))
14497+
return SDValue();
14498+
14499+
// The size should be larger than XLen and smaller than the maximum vector
14500+
// size.
14501+
if (OpSize <= Subtarget.getXLen() ||
14502+
OpSize > Subtarget.getRealMinVLen() *
14503+
Subtarget.getMaxLMULForFixedLengthVectors())
14504+
return SDValue();
14505+
14506+
// Don't perform this combine if constructing the vector will be expensive.
14507+
auto IsVectorBitCastCheap = [](SDValue X) {
14508+
X = peekThroughBitcasts(X);
14509+
return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
14510+
X.getOpcode() == ISD::LOAD;
14511+
};
14512+
if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
14513+
return SDValue();
14514+
14515+
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14516+
Attribute::NoImplicitFloat))
14517+
return SDValue();
14518+
14519+
unsigned VecSize = OpSize / 8;
14520+
EVT VecVT = MVT::getVectorVT(MVT::i8, VecSize);
14521+
EVT CmpVT = MVT::getVectorVT(MVT::i1, VecSize);
14522+
14523+
SDValue VecX = DAG.getBitcast(VecVT, X);
14524+
SDValue VecY = DAG.getBitcast(VecVT, Y);
14525+
SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
14526+
return DAG.getSetCC(DL, VT, DAG.getNode(ISD::VECREDUCE_OR, DL, XLenVT, Cmp),
14527+
DAG.getConstant(0, DL, XLenVT), CC);
14528+
}
14529+
1447714530
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
1447814531
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
1447914532
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
1448014533
// can become a sext.w instead of a shift pair.
1448114534
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1448214535
const RISCVSubtarget &Subtarget) {
14536+
SDLoc dl(N);
1448314537
SDValue N0 = N->getOperand(0);
1448414538
SDValue N1 = N->getOperand(1);
1448514539
EVT VT = N->getValueType(0);
1448614540
EVT OpVT = N0.getValueType();
1448714541

14542+
// Looking for an equality compare.
14543+
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14544+
if (ISD::isIntEqualitySetCC(Cond))
14545+
if (SDValue V = combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG,
14546+
Subtarget))
14547+
return V;
14548+
1448814549
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
1448914550
return SDValue();
1449014551

@@ -14499,8 +14560,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1449914560
N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
1450014561
return SDValue();
1450114562

14502-
// Looking for an equality compare.
14503-
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
1450414563
if (!isIntEqualitySetCC(Cond))
1450514564
return SDValue();
1450614565

@@ -14512,7 +14571,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1451214571

1451314572
const APInt &C1 = N1C->getAPIntValue();
1451414573

14515-
SDLoc dl(N);
1451614574
// If the constant is larger than 2^32 - 1 it is impossible for both sides
1451714575
// to be equal.
1451814576
if (C1.getActiveBits() > 32)

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,3 +2491,36 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
24912491
}
24922492
return true;
24932493
}
2494+
2495+
RISCVTTIImpl::TTI::MemCmpExpansionOptions
2496+
RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
2497+
TTI::MemCmpExpansionOptions Options;
2498+
// Here we assume that a core that has implemented unaligned vector access
2499+
// should also have implemented scalar vector access.
2500+
Options.AllowOverlappingLoads =
2501+
(ST->enableUnalignedScalarMem() || ST->enableUnalignedVectorMem()) &&
2502+
(ST->hasStdExtZbb() || ST->hasStdExtZbkb() || IsZeroCmp);
2503+
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
2504+
Options.NumLoadsPerBlock = Options.MaxNumLoads;
2505+
if (ST->is64Bit())
2506+
Options.LoadSizes = {8, 4, 2, 1};
2507+
else
2508+
Options.LoadSizes = {4, 2, 1};
2509+
if (IsZeroCmp && ST->hasVInstructions()) {
2510+
unsigned RealMinVLen = ST->getRealMinVLen();
2511+
// Support Fractional LMULs if the lengths are larger than XLen.
2512+
// TODO: Support non-power-of-2 types.
2513+
for (unsigned LMUL = 8; LMUL >= 2; LMUL /= 2) {
2514+
unsigned Len = RealMinVLen / LMUL;
2515+
if (Len > ST->getXLen())
2516+
Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
2517+
}
2518+
for (unsigned LMUL = 1; LMUL <= ST->getMaxLMULForFixedLengthVectors();
2519+
LMUL *= 2) {
2520+
unsigned Len = RealMinVLen * LMUL;
2521+
if (Len > ST->getXLen())
2522+
Options.LoadSizes.insert(Options.LoadSizes.begin(), Len / 8);
2523+
}
2524+
}
2525+
return Options;
2526+
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
427427

428428
bool isProfitableToSinkOperands(Instruction *I,
429429
SmallVectorImpl<Use *> &Ops) const;
430+
431+
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
432+
bool IsZeroCmp) const;
430433
};
431434

432435
} // end namespace llvm

0 commit comments

Comments
 (0)