-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAG] combineVSelectWithAllOnesOrZeros - fold select Cond, 0, x -> and not(Cond), x #147472
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag Author: woruyu (woruyu) ChangesSummaryThis patch extends the work from #145298 by removing the now-unnecessary X86-specific combineVSelectWithLastZeros logic. That combine is now correctly and more generally handled in the target-independent combineVSelectWithAllOnesOrZeros. This simplifies the X86 DAG combine logic and avoids duplication. Fixes: #144513 Full diff: https://github.com/llvm/llvm-project/pull/147472.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e7f1fdf10719a..4f06e45cdd0c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9902,11 +9902,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
- // fold !(x cc y) -> (x !cc y)
+ // fold xor (setcc x y cc) -1 -> setcc x y !cc
+ // Avoid breaking: and (xor (setcc x y cc) -1) z -> andn for vec
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1) &&
- isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
+ !(N->hasOneUse() && TLI.hasAndNot(SDValue(N, 0)) &&
+ N->use_begin()->getUser()->getOpcode() == ISD::AND && VT.isVector())) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -13165,6 +13168,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
return DAG.getBitcast(VT, And);
}
+ // select Cond, 0, x -> and not(Cond), x
+ if (IsTAllZero &&
+ (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
+ SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
+ return DAG.getBitcast(VT, And);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fae80f25f71d1..fd617f7062313 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47260,57 +47260,6 @@ static SDValue combineToExtendBoolVectorInReg(
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}
-/// If a vector select has an left operand that is 0, try to simplify the
-/// select to a bitwise logic operation.
-/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
-/// TargetLowering::hasAndNot()?
-static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
- EVT VT = LHS.getValueType();
- EVT CondVT = Cond.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- if (N->getOpcode() != ISD::VSELECT)
- return SDValue();
-
- assert(CondVT.isVector() && "Vector select expects a vector selector!");
-
- // To use the condition operand as a bitwise mask, it must have elements that
- // are the same size as the select elements. Ie, the condition operand must
- // have already been promoted from the IR select condition type <N x i1>.
- // Don't check if the types themselves are equal because that excludes
- // vector floating-point selects.
- if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
- return SDValue();
-
- // Cond value must be 'sign splat' to be converted to a logical op.
- if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
- return SDValue();
-
- if (!TLI.isTypeLegal(CondVT))
- return SDValue();
-
- // vselect Cond, 000..., X -> andn Cond, X
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
- SDValue AndN;
- // The canonical form differs for i1 vectors - x86andnp is not used
- if (CondVT.getScalarType() == MVT::i1)
- AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
- CastRHS);
- else
- AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
- return DAG.getBitcast(VT, AndN);
- }
-
- return SDValue();
-}
-
/// If both arms of a vector select are concatenated vectors, split the select,
/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
@@ -48052,9 +48001,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
return SDValue();
- if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
- return V;
-
if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index d916f26f9b26b..c48ee3939bd2e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z4, [x0]
+; CHECK-NEXT: ldr z5, [x0, #2, mul vl]
+; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
; CHECK-NEXT: umov w8, v0.b[8]
; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
@@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
-; CHECK-NEXT: and z1.s, z1.s, #0x1
+; CHECK-NEXT: bic z1.d, z4.d, z1.d
; CHECK-NEXT: lsl z2.s, z2.s, #31
+; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: lsl z3.s, z3.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
-; CHECK-NEXT: and z0.s, z0.s, #0x1
+; CHECK-NEXT: bic z0.d, z5.d, z0.d
; CHECK-NEXT: asr z3.s, z3.s, #31
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
-; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
-; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
-; CHECK-NEXT: and z3.s, z3.s, #0x1
-; CHECK-NEXT: str z1, [x0]
-; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: ldr z3, [x0, #3, mul vl]
-; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
-; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: bic z1.d, z4.d, z2.d
; CHECK-NEXT: str z0, [x0, #2, mul vl]
+; CHECK-NEXT: bic z3.d, z6.d, z3.d
+; CHECK-NEXT: str z1, [x0, #1, mul vl]
; CHECK-NEXT: str z3, [x0, #3, mul vl]
-; CHECK-NEXT: str z2, [x0, #1, mul vl]
; CHECK-NEXT: .LBB1_2: // %exit
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll
index fe125c9626ea3..3c1f06e0e4ed1 100644
--- a/llvm/test/CodeGen/AArch64/vselect-constants.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll
@@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_0_or_1_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmge v0.4s, v0.4s, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %add
|
|
@llvm/pr-subscribers-backend-x86 Author: woruyu (woruyu) ChangesSummaryThis patch extends the work from #145298 by removing the now-unnecessary X86-specific combineVSelectWithLastZeros logic. That combine is now correctly and more generally handled in the target-independent combineVSelectWithAllOnesOrZeros. This simplifies the X86 DAG combine logic and avoids duplication. Fixes: #144513 Full diff: https://github.com/llvm/llvm-project/pull/147472.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e7f1fdf10719a..4f06e45cdd0c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9902,11 +9902,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue Combined = visitADDLike(N))
return Combined;
- // fold !(x cc y) -> (x !cc y)
+ // fold xor (setcc x y cc) -1 -> setcc x y !cc
+ // Avoid breaking: and (xor (setcc x y cc) -1) z -> andn for vec
unsigned N0Opcode = N0.getOpcode();
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1) &&
- isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
+ isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true) &&
+ !(N->hasOneUse() && TLI.hasAndNot(SDValue(N, 0)) &&
+ N->use_begin()->getUser()->getOpcode() == ISD::AND && VT.isVector())) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
LHS.getValueType());
if (!LegalOperations ||
@@ -13165,6 +13168,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
return DAG.getBitcast(VT, And);
}
+ // select Cond, 0, x -> and not(Cond), x
+ if (IsTAllZero &&
+ (isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
+ SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue And =
+ DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
+ return DAG.getBitcast(VT, And);
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fae80f25f71d1..fd617f7062313 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47260,57 +47260,6 @@ static SDValue combineToExtendBoolVectorInReg(
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}
-/// If a vector select has an left operand that is 0, try to simplify the
-/// select to a bitwise logic operation.
-/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
-/// TargetLowering::hasAndNot()?
-static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
- const SDLoc &DL,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- SDValue Cond = N->getOperand(0);
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
- EVT VT = LHS.getValueType();
- EVT CondVT = Cond.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
- if (N->getOpcode() != ISD::VSELECT)
- return SDValue();
-
- assert(CondVT.isVector() && "Vector select expects a vector selector!");
-
- // To use the condition operand as a bitwise mask, it must have elements that
- // are the same size as the select elements. Ie, the condition operand must
- // have already been promoted from the IR select condition type <N x i1>.
- // Don't check if the types themselves are equal because that excludes
- // vector floating-point selects.
- if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
- return SDValue();
-
- // Cond value must be 'sign splat' to be converted to a logical op.
- if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
- return SDValue();
-
- if (!TLI.isTypeLegal(CondVT))
- return SDValue();
-
- // vselect Cond, 000..., X -> andn Cond, X
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
- SDValue AndN;
- // The canonical form differs for i1 vectors - x86andnp is not used
- if (CondVT.getScalarType() == MVT::i1)
- AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
- CastRHS);
- else
- AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
- return DAG.getBitcast(VT, AndN);
- }
-
- return SDValue();
-}
-
/// If both arms of a vector select are concatenated vectors, split the select,
/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
@@ -48052,9 +48001,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
return SDValue();
- if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
- return V;
-
if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
return V;
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index d916f26f9b26b..c48ee3939bd2e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ldr z4, [x0]
+; CHECK-NEXT: ldr z5, [x0, #2, mul vl]
+; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
; CHECK-NEXT: umov w8, v0.b[8]
; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
@@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
-; CHECK-NEXT: and z1.s, z1.s, #0x1
+; CHECK-NEXT: bic z1.d, z4.d, z1.d
; CHECK-NEXT: lsl z2.s, z2.s, #31
+; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: ldr z1, [x0]
+; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: lsl z3.s, z3.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
-; CHECK-NEXT: and z0.s, z0.s, #0x1
+; CHECK-NEXT: bic z0.d, z5.d, z0.d
; CHECK-NEXT: asr z3.s, z3.s, #31
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
-; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
-; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
-; CHECK-NEXT: and z3.s, z3.s, #0x1
-; CHECK-NEXT: str z1, [x0]
-; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
-; CHECK-NEXT: ldr z3, [x0, #3, mul vl]
-; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
-; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
-; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: bic z1.d, z4.d, z2.d
; CHECK-NEXT: str z0, [x0, #2, mul vl]
+; CHECK-NEXT: bic z3.d, z6.d, z3.d
+; CHECK-NEXT: str z1, [x0, #1, mul vl]
; CHECK-NEXT: str z3, [x0, #3, mul vl]
-; CHECK-NEXT: str z2, [x0, #1, mul vl]
; CHECK-NEXT: .LBB1_2: // %exit
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll
index fe125c9626ea3..3c1f06e0e4ed1 100644
--- a/llvm/test/CodeGen/AArch64/vselect-constants.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll
@@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_0_or_1_vec:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: shl v0.4s, v0.4s, #31
-; CHECK-NEXT: cmge v0.4s, v0.4s, #0
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %add
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cheers!
|
Hi @RKSimon , |
Summary
This patch extends the work from #145298 by removing the now-unnecessary X86-specific combineVSelectWithLastZeros logic. That combine is now correctly and more generally handled in the target-independent combineVSelectWithAllOnesOrZeros.
This simplifies the X86 DAG combine logic and avoids duplication.
Fixes: #144513
Related for reference: #146831