diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a4381b99dbae0..eec596d6d44ab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57803,10 +57803,6 @@ static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) { } static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { - // res, flags2 = sub 0, (setcc cc, flag) - // cload/cstore ..., cond_ne, flag2 - // -> - // cload/cstore cc, flag if (N->getConstantOperandVal(3) != X86::COND_NE) return SDValue(); @@ -57814,16 +57810,34 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) { if (Sub.getOpcode() != X86ISD::SUB) return SDValue(); - SDValue SetCC = Sub.getOperand(1); + SDValue Op1 = Sub.getOperand(1); - if (!X86::isZeroNode(Sub.getOperand(0)) || SetCC.getOpcode() != X86ISD::SETCC) + if (!X86::isZeroNode(Sub.getOperand(0))) return SDValue(); + SDLoc DL(N); SmallVector Ops(N->op_values()); - Ops[3] = SetCC.getOperand(0); - Ops[4] = SetCC.getOperand(1); + if (Op1.getOpcode() == X86ISD::SETCC) { + // res, flags2 = sub 0, (setcc cc, flag) + // cload/cstore ..., cond_ne, flag2 + // -> + // cload/cstore cc, flag + Ops[3] = Op1.getOperand(0); + Ops[4] = Op1.getOperand(1); + } else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) { + // res, flags2 = sub 0, (and X, Y) + // cload/cstore ..., cond_ne, flag2 + // -> + // res, flags2 = and X, Y + // cload/cstore ..., cond_ne, flag2 + Ops[4] = DAG.getNode(X86ISD::AND, DL, Sub->getVTList(), Op1.getOperand(0), + Op1.getOperand(1)) + .getValue(1); + } else { + return SDValue(); + } - return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops, + return DAG.getMemIntrinsicNode(N->getOpcode(), DL, N->getVTList(), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } diff --git a/llvm/test/CodeGen/X86/apx/cf.ll b/llvm/test/CodeGen/X86/apx/cf.ll index 8d104e5f3ced2..1e4ac3f419314 100644 --- a/llvm/test/CodeGen/X86/apx/cf.ll +++ b/llvm/test/CodeGen/X86/apx/cf.ll @@ -158,3 +158,39 @@ entry: tail call void @llvm.masked.store.v1i16.p0(<1 x i16> %5, ptr %p, i32 2, <1 x i1> %1) ret void } + +define void @load_zext(i1 %cond, ptr %b, ptr %p) { +; CHECK-LABEL: load_zext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: cfcmovnew (%rsi), %ax +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: cfcmovnel %eax, (%rdx) +; CHECK-NEXT: retq +entry: + %0 = bitcast i1 %cond to <1 x i1> + %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i16> poison) + %2 = bitcast <1 x i16> %1 to i16 + %zext = zext i16 %2 to i32 + %3 = bitcast i32 %zext to <1 x i32> + call void @llvm.masked.store.v1i32.p0(<1 x i32> %3, ptr %p, i32 4, <1 x i1> %0) + ret void +} + +define void @load_sext(i1 %cond, ptr %b, ptr %p) { +; CHECK-LABEL: load_sext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andb $1, %dil +; CHECK-NEXT: cfcmovnel (%rsi), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: cfcmovneq %rax, (%rdx) +; CHECK-NEXT: retq +entry: + %0 = bitcast i1 %cond to <1 x i1> + %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 2, <1 x i1> %0, <1 x i32> poison) + %2 = bitcast <1 x i32> %1 to i32 + %zext = sext i32 %2 to i64 + %3 = bitcast i64 %zext to <1 x i64> + call void @llvm.masked.store.v1i64.p0(<1 x i64> %3, ptr %p, i32 4, <1 x i1> %0) + ret void +}