Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48624,6 +48624,45 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) {
return SDValue();
}

static SDValue canFoldToTESTP(SDValue Val, const SDLoc &DL, const EVT PTestVT,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX())
return SDValue();

EVT VT = Val.getValueType();
unsigned EltBits = VT.getScalarSizeInBits();

if (EltBits != 32 && EltBits != 64)
return SDValue();

SDValue Op0 = Val.getOperand(0);
SDValue Op1 = Val.getOperand(1);

MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
MVT FloatVT = MVT::getVectorVT(FloatSVT, VT.getVectorNumElements());

// (ptest (and Op0, splat(minSignedVal)), (and Op0, splat(minSignedVal))) ->
// (testp Op0, Op0)
APInt Splat;
if (ISD::isConstantSplatVector(Op1.getNode(), Splat) &&
Splat.getBitWidth() == EltBits && Splat.isMinSignedValue()) {
SDValue FpOp0 = DAG.getBitcast(FloatVT, Op0);
return DAG.getNode(X86ISD::TESTP, DL, PTestVT, FpOp0, FpOp0);
}

// (ptest (and (and Op0, splat(minSignedVal), Op1), ...)) -> (testp Op0, Op1)
if (Op0.getOpcode() == ISD::AND &&
ISD::isConstantSplatVector(Op0.getOperand(1).getNode(), Splat) &&
Splat.getBitWidth() == EltBits && Splat.isMinSignedValue()) {
SDValue FpOp0 = DAG.getBitcast(FloatVT, Op0.getOperand(0));
SDValue FpOp1 = DAG.getBitcast(FloatVT, Op1);
return DAG.getNode(X86ISD::TESTP, DL, PTestVT, FpOp0, FpOp1);
}

return SDValue();
}

/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC
/// to avoid the inversion.
static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
Expand Down Expand Up @@ -48718,6 +48757,10 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
SDValue BC = peekThroughBitcasts(Op0);
EVT BCVT = BC.getValueType();

if (EFLAGS.getOpcode() == X86ISD::PTEST && BC.getOpcode() == ISD::AND)
if (SDValue V = canFoldToTESTP(BC, SDLoc(EFLAGS), VT, DAG, Subtarget))
return V;

// TESTZ(AND(X,Y),AND(X,Y)) == TESTZ(X,Y)
if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) {
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
Expand Down
56 changes: 12 additions & 44 deletions llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -875,28 +875,12 @@ define i1 @mask_v8i32(<8 x i32> %a0) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
; AVX1-LABEL: mask_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: mask_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
; AVX2-NEXT: vptest %ymm1, %ymm0
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: mask_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
; AVX512-NEXT: vptest %ymm1, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: mask_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vtestps %ymm0, %ymm0
; AVX-NEXT: sete %al
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
%2 = and i32 %1, 2147483648
%3 = icmp eq i32 %2, 0
Expand Down Expand Up @@ -965,28 +949,12 @@ define i1 @signtest_v8i32(<8 x i32> %a0) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
; AVX1-LABEL: signtest_v8i32:
; AVX1: # %bb.0:
; AVX1-NEXT: vptest {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: signtest_v8i32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
; AVX2-NEXT: vptest %ymm1, %ymm0
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: signtest_v8i32:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372039002259456,9223372039002259456,9223372039002259456,9223372039002259456]
; AVX512-NEXT: vptest %ymm1, %ymm0
; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: signtest_v8i32:
; AVX: # %bb.0:
; AVX-NEXT: vtestps %ymm0, %ymm0
; AVX-NEXT: sete %al
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a0)
%2 = icmp sgt i32 %1, -1
ret i1 %2
Expand Down
Loading