@@ -32315,6 +32315,54 @@ bool X86TargetLowering::isInlineAsmTargetBranch(
3231532315 return Inst.equals_insensitive("call") || Inst.equals_insensitive("jmp");
3231632316}
3231732317
32318+ static SDValue getFlagsOfCmpZeroFori1(SelectionDAG &DAG, const SDLoc &DL,
32319+ SDValue Mask) {
32320+ EVT Ty = MVT::i8;
32321+ auto V = DAG.getBitcast(MVT::i1, Mask);
32322+ auto VE = DAG.getZExtOrTrunc(V, DL, Ty);
32323+ auto Zero = DAG.getConstant(0, DL, Ty);
32324+ SDVTList X86SubVTs = DAG.getVTList(Ty, MVT::i32);
32325+ auto CmpZero = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, VE);
32326+ return SDValue(CmpZero.getNode(), 1);
32327+ }
32328+
32329+ SDValue X86TargetLowering::visitMaskedLoad(
32330+ SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, MachineMemOperand *MMO,
32331+ SDValue &NewLoad, SDValue Ptr, SDValue PassThru, SDValue Mask) const {
32332+ // @llvm.masked.load.v1*(ptr, alignment, mask, passthru)
32333+ // ->
32334+ // _, flags = SUB 0, mask
32335+ // res, chain = CLOAD inchain, ptr, (bit_cast_to_scalar passthru), cond, flags
32336+ // bit_cast_to_vector<res>
32337+ EVT VTy = PassThru.getValueType();
32338+ EVT Ty = VTy.getVectorElementType();
32339+ SDVTList Tys = DAG.getVTList(Ty, MVT::Other);
32340+ auto ScalarPassThru = PassThru.isUndef() ? DAG.getConstant(0, DL, Ty)
32341+ : DAG.getBitcast(Ty, PassThru);
32342+ auto Flags = getFlagsOfCmpZeroFori1(DAG, DL, Mask);
32343+ auto COND_NE = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8);
32344+ SDValue Ops[] = {Chain, Ptr, ScalarPassThru, COND_NE, Flags};
32345+ NewLoad = DAG.getMemIntrinsicNode(X86ISD::CLOAD, DL, Tys, Ops, Ty, MMO);
32346+ return DAG.getBitcast(VTy, NewLoad);
32347+ }
32348+
32349+ SDValue X86TargetLowering::visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL,
32350+ SDValue Chain,
32351+ MachineMemOperand *MMO, SDValue Ptr,
32352+ SDValue Val, SDValue Mask) const {
32353+ // llvm.masked.store.v1*(Src0, Ptr, alignment, Mask)
32354+ // ->
32355+ // _, flags = SUB 0, mask
32356+ // chain = CSTORE inchain, (bit_cast_to_scalar val), ptr, cond, flags
32357+ EVT Ty = Val.getValueType().getVectorElementType();
32358+ SDVTList Tys = DAG.getVTList(MVT::Other);
32359+ auto ScalarVal = DAG.getBitcast(Ty, Val);
32360+ auto Flags = getFlagsOfCmpZeroFori1(DAG, DL, Mask);
32361+ auto COND_NE = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8);
32362+ SDValue Ops[] = {Chain, ScalarVal, Ptr, COND_NE, Flags};
32363+ return DAG.getMemIntrinsicNode(X86ISD::CSTORE, DL, Tys, Ops, Ty, MMO);
32364+ }
32365+
3231832366/// Provide custom lowering hooks for some operations.
3231932367SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3232032368 switch (Op.getOpcode()) {
@@ -34031,6 +34079,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3403134079 NODE_NAME_CASE(STRICT_FP80_ADD)
3403234080 NODE_NAME_CASE(CCMP)
3403334081 NODE_NAME_CASE(CTEST)
34082+ NODE_NAME_CASE(CLOAD)
34083+ NODE_NAME_CASE(CSTORE)
3403434084 }
3403534085 return nullptr;
3403634086#undef NODE_NAME_CASE
@@ -55636,6 +55686,32 @@ static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
5563655686 return SDValue();
5563755687}
5563855688
55689+ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
55690+ // res, flags2 = sub 0, (setcc cc, flag)
55691+ // cload/cstore ..., cond_ne, flag2
55692+ // ->
55693+ // cload/cstore cc, flag
55694+ if (N->getConstantOperandVal(3) != X86::COND_NE)
55695+ return SDValue();
55696+
55697+ SDValue Sub = N->getOperand(4);
55698+ if (Sub.getOpcode() != X86ISD::SUB)
55699+ return SDValue();
55700+
55701+ SDValue SetCC = Sub.getOperand(1);
55702+
55703+ if (!X86::isZeroNode(Sub.getOperand(0)) || SetCC.getOpcode() != X86ISD::SETCC)
55704+ return SDValue();
55705+
55706+ SmallVector<SDValue, 5> Ops(N->op_values());
55707+ Ops[3] = SetCC.getOperand(0);
55708+ Ops[4] = SetCC.getOperand(1);
55709+
55710+ return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops,
55711+ cast<MemSDNode>(N)->getMemoryVT(),
55712+ cast<MemSDNode>(N)->getMemOperand());
55713+ }
55714+
5563955715static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
5564055716 TargetLowering::DAGCombinerInfo &DCI,
5564155717 const X86Subtarget &Subtarget) {
@@ -57345,6 +57421,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5734557421 case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
5734657422 case X86ISD::ADD:
5734757423 case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
57424+ case X86ISD::CLOAD:
57425+ case X86ISD::CSTORE: return combineX86CloadCstore(N, DAG);
5734857426 case X86ISD::SBB: return combineSBB(N, DAG);
5734957427 case X86ISD::ADC: return combineADC(N, DAG, DCI);
5735057428 case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
0 commit comments