Skip to content

Commit 13fcd84

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.6-beta.1
2 parents d3daa3c + 3831082 commit 13fcd84

File tree

5 files changed

+38051
-3
lines changed

5 files changed

+38051
-3
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14474,17 +14474,74 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
1447414474
return true;
1447514475
}
1447614476

14477+
/// Try to map an integer comparison with size > XLEN to vector instructions
14478+
/// before type legalization splits it up into chunks.
14479+
static SDValue
14480+
combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
14481+
const SDLoc &DL, SelectionDAG &DAG,
14482+
const RISCVSubtarget &Subtarget) {
14483+
assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
14484+
14485+
if (!Subtarget.hasVInstructions())
14486+
return SDValue();
14487+
14488+
MVT XLenVT = Subtarget.getXLenVT();
14489+
EVT OpVT = X.getValueType();
14490+
// We're looking for an oversized integer equality comparison.
14491+
if (OpVT.isScalableVT() || !OpVT.isScalarInteger())
14492+
return SDValue();
14493+
14494+
unsigned OpSize = OpVT.getSizeInBits();
14495+
// The size should be larger than XLen and smaller than the maximum vector
14496+
// size.
14497+
if (OpSize <= Subtarget.getXLen() ||
14498+
OpSize > Subtarget.getRealMinVLen() *
14499+
Subtarget.getMaxLMULForFixedLengthVectors())
14500+
return SDValue();
14501+
14502+
// Don't perform this combine if constructing the vector will be expensive.
14503+
auto IsVectorBitCastCheap = [](SDValue X) {
14504+
X = peekThroughBitcasts(X);
14505+
return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
14506+
X.getOpcode() == ISD::LOAD;
14507+
};
14508+
if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
14509+
return SDValue();
14510+
14511+
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
14512+
Attribute::NoImplicitFloat))
14513+
return SDValue();
14514+
14515+
unsigned VecSize = OpSize / 8;
14516+
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
14517+
EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
14518+
14519+
SDValue VecX = DAG.getBitcast(VecVT, X);
14520+
SDValue VecY = DAG.getBitcast(VecVT, Y);
14521+
SDValue Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
14522+
return DAG.getSetCC(DL, VT, DAG.getNode(ISD::VECREDUCE_OR, DL, XLenVT, Cmp),
14523+
DAG.getConstant(0, DL, XLenVT), CC);
14524+
}
14525+
1447714526
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
1447814527
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
1447914528
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
1448014529
// can become a sext.w instead of a shift pair.
1448114530
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1448214531
const RISCVSubtarget &Subtarget) {
14532+
SDLoc dl(N);
1448314533
SDValue N0 = N->getOperand(0);
1448414534
SDValue N1 = N->getOperand(1);
1448514535
EVT VT = N->getValueType(0);
1448614536
EVT OpVT = N0.getValueType();
1448714537

14538+
// Looking for an equality compare.
14539+
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14540+
if (ISD::isIntEqualitySetCC(Cond))
14541+
if (SDValue V = combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG,
14542+
Subtarget))
14543+
return V;
14544+
1448814545
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
1448914546
return SDValue();
1449014547

@@ -14499,8 +14556,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1449914556
N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
1450014557
return SDValue();
1450114558

14502-
// Looking for an equality compare.
14503-
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
1450414559
if (!isIntEqualitySetCC(Cond))
1450514560
return SDValue();
1450614561

@@ -14512,7 +14567,6 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
1451214567

1451314568
const APInt &C1 = N1C->getAPIntValue();
1451414569

14515-
SDLoc dl(N);
1451614570
// If the constant is larger than 2^32 - 1 it is impossible for both sides
1451714571
// to be equal.
1451814572
if (C1.getActiveBits() > 32)

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,3 +2491,26 @@ bool RISCVTTIImpl::isProfitableToSinkOperands(
24912491
}
24922492
return true;
24932493
}
2494+
2495+
RISCVTTIImpl::TTI::MemCmpExpansionOptions
2496+
RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
2497+
TTI::MemCmpExpansionOptions Options;
2498+
// Here we assume that a core that has implemented unaligned vector access
2499+
// should also have implemented scalar vector access.
2500+
Options.AllowOverlappingLoads =
2501+
(ST->enableUnalignedScalarMem() || ST->enableUnalignedVectorMem()) &&
2502+
(ST->hasStdExtZbb() || ST->hasStdExtZbkb() || IsZeroCmp);
2503+
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
2504+
Options.NumLoadsPerBlock = Options.MaxNumLoads;
2505+
if (ST->is64Bit())
2506+
Options.LoadSizes = {8, 4, 2, 1};
2507+
else
2508+
Options.LoadSizes = {4, 2, 1};
2509+
if (IsZeroCmp && ST->hasVInstructions()) {
2510+
unsigned VLenB = ST->getRealMinVLen() / 8;
2511+
for (unsigned Size = ST->getXLen() / 8 + 1;
2512+
Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++)
2513+
Options.LoadSizes.insert(Options.LoadSizes.begin(), Size);
2514+
}
2515+
return Options;
2516+
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
427427

428428
bool isProfitableToSinkOperands(Instruction *I,
429429
SmallVectorImpl<Use *> &Ops) const;
430+
431+
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
432+
bool IsZeroCmp) const;
430433
};
431434

432435
} // end namespace llvm

0 commit comments

Comments
 (0)