-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[X86] Remove restriction to pre-type legalization for combine in scalarizeExtEltFP
#117682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.
|
@llvm/pr-subscribers-backend-x86 Author: None (abhishek-kaushik22) ChangesRemove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where Full diff: https://github.com/llvm/llvm-project/pull/117682.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e4533570f75086..0a0c54492c2995 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45842,7 +45842,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
/// Extracting a scalar FP value from vector element 0 is free, so extract each
/// operand first, then perform the math as a scalar op.
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ TargetLowering::DAGCombinerInfo &DCI) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -45877,23 +45878,30 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
// Vector FP selects don't fit the pattern of FP math ops (because the
// condition has a different type and we have to change the opcode), so deal
// with those here.
- // FIXME: This is restricted to pre type legalization by ensuring the setcc
- // has i1 elements. If we loosen this we need to convert vector bool to a
- // scalar bool.
if (Vec.getOpcode() == ISD::VSELECT &&
Vec.getOperand(0).getOpcode() == ISD::SETCC &&
Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
SDLoc DL(ExtElt);
- SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- Vec.getOperand(0).getValueType().getScalarType(),
- Vec.getOperand(0), Index);
- SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(1), Index);
- SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(2), Index);
- return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+ bool AfterLegalize = DCI.getDAGCombineLevel() >= llvm::AfterLegalizeTypes;
+ EVT VecOperandType = Vec.getOperand(0).getValueType();
+ EVT LegalType = DAG.getTargetLoweringInfo().getTypeToTransformTo(
+ *DAG.getContext(), VecOperandType.getScalarType());
+ // The second condition checks that we don't create an invalid extract e.g
+ // 32 bit extract from a v*i64. This will cause a crash on 32-bit machines.
+ if (!AfterLegalize ||
+ LegalType.getSizeInBits() >= VecOperandType.getScalarSizeInBits()) {
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ AfterLegalize ? LegalType
+ : VecOperandType.getScalarType(),
+ Vec.getOperand(0), Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(1), Index);
+ SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(2), Index);
+ return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+ }
}
// TODO: This switch could include FNEG and the x86-specific FP logic ops
@@ -46242,7 +46250,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
- if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
+ if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
return V;
if (CIdx)
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 944f6bbfd0bfbe..967493d3ac608b 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -320,10 +320,18 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
+; X64-NEXT: vmovq %xmm1, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: jne .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT: # %bb.2:
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X64-NEXT: jmp .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
+; X64-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-NEXT: retq
;
; X86-LABEL: extvselectsetcc_crash:
|
| ; X64-NEXT: vmovq %xmm1, %rax | ||
| ; X64-NEXT: testq %rax, %rax | ||
| ; X64-NEXT: jne .{{\.?LBB[0-9]+_[0-9]+}} | ||
| ; X64-NEXT: # %bb.2: | ||
| ; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 | ||
| ; X64-NEXT: jmp .{{\.?LBB[0-9]+_[0-9]+}} | ||
| ; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}: | ||
| ; X64-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] | ||
| ; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}: | ||
| ; X64-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0] | ||
| ; X64-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0] | ||
| ; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks regression to me.
|
Please can you regenerate extractelement-fp.ll as there looks to be other changes |
|
(sorry I had missed that this had been replaced with #117681) |
Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where
v8i1is a legal type buti1itself is not.Closes #117684