Skip to content

Commit 230f5c6

Browse files
[X86] Remove restriction to pre-type legalization for scalarizeExtEltFP
Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.
1 parent 5e3f615 commit 230f5c6

File tree

2 files changed

+33
-17
lines changed

2 files changed

+33
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45842,7 +45842,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
4584245842
/// Extracting a scalar FP value from vector element 0 is free, so extract each
4584345843
/// operand first, then perform the math as a scalar op.
4584445844
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
45845-
const X86Subtarget &Subtarget) {
45845+
const X86Subtarget &Subtarget,
45846+
TargetLowering::DAGCombinerInfo &DCI) {
4584645847
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
4584745848
SDValue Vec = ExtElt->getOperand(0);
4584845849
SDValue Index = ExtElt->getOperand(1);
@@ -45877,23 +45878,30 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
4587745878
// Vector FP selects don't fit the pattern of FP math ops (because the
4587845879
// condition has a different type and we have to change the opcode), so deal
4587945880
// with those here.
45880-
// FIXME: This is restricted to pre type legalization by ensuring the setcc
45881-
// has i1 elements. If we loosen this we need to convert vector bool to a
45882-
// scalar bool.
4588345881
if (Vec.getOpcode() == ISD::VSELECT &&
4588445882
Vec.getOperand(0).getOpcode() == ISD::SETCC &&
4588545883
Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
4588645884
Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
4588745885
// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
4588845886
SDLoc DL(ExtElt);
45889-
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
45890-
Vec.getOperand(0).getValueType().getScalarType(),
45891-
Vec.getOperand(0), Index);
45892-
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
45893-
Vec.getOperand(1), Index);
45894-
SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
45895-
Vec.getOperand(2), Index);
45896-
return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
45887+
bool AfterLegalize = DCI.getDAGCombineLevel() >= llvm::AfterLegalizeTypes;
45888+
EVT VecOperandType = Vec.getOperand(0).getValueType();
45889+
EVT LegalType = DAG.getTargetLoweringInfo().getTypeToTransformTo(
45890+
*DAG.getContext(), VecOperandType.getScalarType());
45891+
// The second condition checks that we don't create an invalid extract e.g
45892+
// 32 bit extract from a v*i64. This will cause a crash on 32-bit machines.
45893+
if (!AfterLegalize ||
45894+
LegalType.getSizeInBits() >= VecOperandType.getScalarSizeInBits()) {
45895+
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
45896+
AfterLegalize ? LegalType
45897+
: VecOperandType.getScalarType(),
45898+
Vec.getOperand(0), Index);
45899+
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
45900+
Vec.getOperand(1), Index);
45901+
SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
45902+
Vec.getOperand(2), Index);
45903+
return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
45904+
}
4589745905
}
4589845906

4589945907
// TODO: This switch could include FNEG and the x86-specific FP logic ops
@@ -46242,7 +46250,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
4624246250
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
4624346251
return V;
4624446252

46245-
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
46253+
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
4624646254
return V;
4624746255

4624846256
if (CIdx)

llvm/test/CodeGen/X86/extractelement-fp.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,10 +320,18 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
320320
; X64-LABEL: extvselectsetcc_crash:
321321
; X64: # %bb.0:
322322
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
323-
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
324-
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
325-
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
326-
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
323+
; X64-NEXT: vmovq %xmm1, %rax
324+
; X64-NEXT: testq %rax, %rax
325+
; X64-NEXT: jne .{{\.?LBB[0-9]+_[0-9]+}}
326+
; X64-NEXT: # %bb.2:
327+
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
328+
; X64-NEXT: jmp .{{\.?LBB[0-9]+_[0-9]+}}
329+
; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
330+
; X64-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
331+
; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
332+
; X64-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
333+
; X64-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
334+
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
327335
; X64-NEXT: retq
328336
;
329337
; X86-LABEL: extvselectsetcc_crash:

0 commit comments

Comments
 (0)