Skip to content

Commit cfbf70c

Browse files
committed
implement memcmp with known fix length size.
1 parent 4bf0001 commit cfbf70c

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15556,6 +15556,66 @@ SDValue PPCTargetLowering::combineSetCC(SDNode *N,
1555615556
SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
1555715557
return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
1555815558
}
15559+
15560+
if (LHS.getOpcode() == ISD::LOAD && RHS.getOpcode() == ISD::LOAD &&
15561+
LHS.hasOneUse() && RHS.hasOneUse() && LHS.getValueType() == MVT::i128 &&
15562+
RHS.getValueType() == MVT::i128) {
15563+
SDLoc DL(N);
15564+
SelectionDAG &DAG = DCI.DAG;
15565+
auto *LA = dyn_cast<LoadSDNode>(LHS);
15566+
auto *LB = dyn_cast<LoadSDNode>(RHS);
15567+
if (!LA || !LB)
15568+
return SDValue();
15569+
15570+
// If either memory operation (LA or LB) is volatile, do not perform any
15571+
// optimization or transformation. Volatile operations must be preserved
15572+
// as written to ensure correct program behavior, so we return an empty
15573+
// SDValue to indicate no action.
15574+
if (LA->isVolatile() || LB->isVolatile())
15575+
return SDValue();
15576+
15577+
// Only combine loads if both use the unindexed addressing mode.
15578+
// PowerPC AltiVec/VMX does not support vector loads or stores with
15579+
// pre/post-increment addressing. Indexed modes may imply implicit pointer
15580+
// updates, which are not compatible with AltiVec vector instructions.
15581+
if (LA->getAddressingMode() != ISD::UNINDEXED ||
15582+
LB->getAddressingMode() != ISD::UNINDEXED)
15583+
return SDValue();
15584+
15585+
// Only combine loads if both are non-extending loads (ISD::NON_EXTLOAD).
15586+
// Extending loads (such as ISD::ZEXTLOAD or ISD::SEXTLOAD) perform zero
15587+
// or sign extension, which may change the loaded value's semantics and
15588+
// are not compatible with vector loads.
15589+
if (LA->getExtensionType() != ISD::NON_EXTLOAD ||
15590+
LB->getExtensionType() != ISD::NON_EXTLOAD)
15591+
return SDValue();
15592+
// Build new v16i8 loads using the SAME chain/base/MMO (no extra memory
15593+
// op).
15594+
SDValue LHSVec = DAG.getLoad(MVT::v16i8, DL, LA->getChain(),
15595+
LA->getBasePtr(), LA->getMemOperand());
15596+
SDValue RHSVec = DAG.getLoad(MVT::v16i8, DL, LB->getChain(),
15597+
LB->getBasePtr(), LB->getMemOperand());
15598+
15599+
// Replace old loads?¡¥ results (value and chain) so the old nodes die.
15600+
// DAG.DeleteNode(LHS.getNode());
15601+
// DAG.DeleteNode(RHS.getNode());
15602+
15603+
// SDValue LHSVec = DAG.getBitcast(MVT::v16i8, LHS);
15604+
// SDValue RHSVec = DAG.getBitcast(MVT::v16i8, RHS);
15605+
SDValue IntrID =
15606+
DAG.getTargetConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL,
15607+
Subtarget.isPPC64() ? MVT::i64 : MVT::i32);
15608+
SDValue CRSel =
15609+
DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15610+
SDValue Ops[] = {IntrID, CRSel, LHSVec, RHSVec};
15611+
SDValue PredResult =
15612+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, Ops);
15613+
// ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15614+
// so we need to invert the CC opcode.
15615+
return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15616+
DAG.getConstant(0, DL, MVT::i32),
15617+
CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15618+
}
1555915619
}
1556015620

1556115621
return DAGCombineTruncBoolExt(N, DCI);

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) const {
439439
PPCTTIImpl::TTI::MemCmpExpansionOptions
440440
PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
441441
TTI::MemCmpExpansionOptions Options;
442-
Options.LoadSizes = {8, 4, 2, 1};
442+
Options.LoadSizes = {16, 8, 4, 2, 1};
443443
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
444444
return Options;
445445
}

0 commit comments

Comments
 (0)