Skip to content

Commit 9bdf683

Browse files
[X86] Enforce strict pre-legalization to combine in scalarizeExtEltFP (#117681)
Use a `DCI` object to actually check the DAG combine level instead of using the type `i1` because this assumption fails on AVX512 where we have types like `v8i1` after legalization. Closes #117684
1 parent e573c6b commit 9bdf683

File tree

2 files changed

+47
-7
lines changed

2 files changed

+47
-7
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45857,7 +45857,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
4585745857
/// Extracting a scalar FP value from vector element 0 is free, so extract each
4585845858
/// operand first, then perform the math as a scalar op.
4585945859
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
45860-
const X86Subtarget &Subtarget) {
45860+
const X86Subtarget &Subtarget,
45861+
TargetLowering::DAGCombinerInfo &DCI) {
4586145862
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
4586245863
SDValue Vec = ExtElt->getOperand(0);
4586345864
SDValue Index = ExtElt->getOperand(1);
@@ -45892,13 +45893,13 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
4589245893
// Vector FP selects don't fit the pattern of FP math ops (because the
4589345894
// condition has a different type and we have to change the opcode), so deal
4589445895
// with those here.
45895-
// FIXME: This is restricted to pre type legalization by ensuring the setcc
45896-
// has i1 elements. If we loosen this we need to convert vector bool to a
45897-
// scalar bool.
45898-
if (Vec.getOpcode() == ISD::VSELECT &&
45896+
// FIXME: This is restricted to pre type legalization. If we loosen this we
45897+
// need to convert vector bool to a scalar bool.
45898+
if (DCI.isBeforeLegalize() && Vec.getOpcode() == ISD::VSELECT &&
4589945899
Vec.getOperand(0).getOpcode() == ISD::SETCC &&
45900-
Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
4590145900
Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
45901+
assert(Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
45902+
"Unexpected cond type for combine");
4590245903
// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
4590345904
SDLoc DL(ExtElt);
4590445905
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
@@ -46257,7 +46258,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
4625746258
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
4625846259
return V;
4625946260

46260-
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
46261+
if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
4626146262
return V;
4626246263

4626346264
if (CIdx)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
3+
4+
define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 {
5+
; CHECK-LABEL: PR117684:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
8+
; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1
9+
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
10+
; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z}
11+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
12+
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
13+
; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
14+
; CHECK-NEXT: vbroadcastss %xmm2, %ymm2
15+
; CHECK-NEXT: testb $1, %dil
16+
; CHECK-NEXT: cmoveq %rdx, %rsi
17+
; CHECK-NEXT: vmovups %ymm2, (%rsi)
18+
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
19+
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
20+
; CHECK-NEXT: vmovups %ymm0, (%rdx)
21+
; CHECK-NEXT: vzeroupper
22+
; CHECK-NEXT: retq
23+
%cmp = fcmp olt <8 x float> %vec, zeroinitializer
24+
%sel1 = select <8 x i1> %cmp, <8 x float> zeroinitializer, <8 x float>
25+
<float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000,
26+
float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
27+
%fmul1 = fmul <8 x float> zeroinitializer, %sel1
28+
%shuffle = shufflevector <8 x float> %fmul1, <8 x float> zeroinitializer, <8 x i32> zeroinitializer
29+
%fmul2 = fmul <8 x float> %shuffle,
30+
<float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000,
31+
float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
32+
%sel2 = select i1 %cond, ptr %ptr1, ptr %ptr2
33+
store <8 x float> %fmul2, ptr %sel2, align 4
34+
%fmul3 = fmul <8 x float> %shuffle, zeroinitializer
35+
store <8 x float> %fmul3, ptr %ptr2, align 4
36+
ret void
37+
}
38+
39+
attributes #0 = { "target-cpu"="skylake-avx512" }

0 commit comments

Comments
 (0)