From 10d57c128717ba78c348875c6149f755f90fc490 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Tue, 26 Nov 2024 13:30:34 +0530 Subject: [PATCH 1/5] [X86] Enforce strict pre-legalization to combine in scalarizeExtEltFP --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e4533570f7508..365ae660ea91e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45842,7 +45842,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, /// Extracting a scalar FP value from vector element 0 is free, so extract each /// operand first, then perform the math as a scalar op. static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { + const X86Subtarget &Subtarget, + TargetLowering::DAGCombinerInfo &DCI) { assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); @@ -45877,10 +45878,10 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, // Vector FP selects don't fit the pattern of FP math ops (because the // condition has a different type and we have to change the opcode), so deal // with those here. - // FIXME: This is restricted to pre type legalization by ensuring the setcc - // has i1 elements. If we loosen this we need to convert vector bool to a - // scalar bool. - if (Vec.getOpcode() == ISD::VSELECT && + // FIXME: This is restricted to pre type legalization. If we loosen this we + // need to convert vector bool to a scalar bool. + if (DCI.getDAGCombineLevel() < llvm::AfterLegalizeTypes && + Vec.getOpcode() == ISD::VSELECT && Vec.getOperand(0).getOpcode() == ISD::SETCC && Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { @@ -46242,7 +46243,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineArithReduction(N, DAG, Subtarget)) return V; - if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget)) + if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI)) return V; if (CIdx) From 36686cfd0f92d57b50b9215e1f78a8e2c0224112 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Tue, 26 Nov 2024 15:42:18 +0530 Subject: [PATCH 2/5] Add assert on type --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 365ae660ea91e..29de35ce688a5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45883,8 +45883,9 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, if (DCI.getDAGCombineLevel() < llvm::AfterLegalizeTypes && Vec.getOpcode() == ISD::VSELECT && Vec.getOperand(0).getOpcode() == ISD::SETCC && - Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { + assert(Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && + "Unexpected cond type for combine"); // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0) SDLoc DL(ExtElt); SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, From 556a6a0523d6bb583de270912869bc7cef8851b3 Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Tue, 26 Nov 2024 18:25:03 +0530 Subject: [PATCH 3/5] Create extract-vselect-setcc.ll --- .../test/CodeGen/X86/extract-vselect-setcc.ll | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 llvm/test/CodeGen/X86/extract-vselect-setcc.ll diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll new file mode 100644 index 0000000000000..3c39be10b73e7 --- /dev/null +++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s | FileCheck %s + +define void @extvselectsetcc_illegal(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 { +; CHECK-LABEL: extvselectsetcc_illegal: +; CHECK: # %bb.0: +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastss .LCPI0_0(%rip), %xmm0 # xmm0 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmulss .LCPI0_0(%rip), %xmm0, %xmm2 +; CHECK-NEXT: vbroadcastss %xmm2, %ymm2 +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rsi +; CHECK-NEXT: vmovups %ymm2, (%rsi) +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: vmovups %ymm0, (%rdx) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %cmp = fcmp olt <8 x float> %vec, zeroinitializer + %sel1 = select <8 x i1> %cmp, <8 x float> zeroinitializer, <8 x float> + + %fmul1 = fmul <8 x float> zeroinitializer, %sel1 + %shuffle = shufflevector <8 x float> %fmul1, <8 x float> zeroinitializer, <8 x i32> zeroinitializer + %fmul2 = fmul <8 x float> %shuffle, + + %sel2 = select i1 %cond, ptr %ptr1, ptr %ptr2 + store <8 x float> %fmul2, ptr %sel2, align 4 + %fmul3 = fmul <8 x float> %shuffle, zeroinitializer + store <8 x float> %fmul3, ptr %ptr2, align 4 + ret void +} + +attributes #0 = { "target-cpu"="skylake-avx512" } From 0f17a589750528fae8361f2fccadf11fb7e8674c Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Wed, 27 Nov 2024 12:04:14 +0530 Subject: [PATCH 4/5] Update extract-vselect-setcc.ll --- .../test/CodeGen/X86/extract-vselect-setcc.ll | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll index 3c39be10b73e7..81ab104cab283 100644 --- a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll +++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll @@ -1,33 +1,34 @@ -; RUN: llc < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 | FileCheck %s -define void @extvselectsetcc_illegal(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 { -; CHECK-LABEL: extvselectsetcc_illegal: +define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 { +; CHECK-LABEL: PR117684: ; CHECK: # %bb.0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1 -; CHECK-NEXT: vbroadcastss .LCPI0_0(%rip), %xmm0 # xmm0 = [NaN,NaN,NaN,NaN] -; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vmulss .LCPI0_0(%rip), %xmm0, %xmm2 -; CHECK-NEXT: vbroadcastss %xmm2, %ymm2 -; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: cmoveq %rdx, %rsi -; CHECK-NEXT: vmovups %ymm2, (%rsi) -; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 -; CHECK-NEXT: vmovups %ymm0, (%rdx) -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z} +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-NEXT: vbroadcastss %xmm2, %ymm2 +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: cmoveq %rdx, %rsi +; CHECK-NEXT: vmovups %ymm2, (%rsi) +; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: vmovups %ymm0, (%rdx) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq %cmp = fcmp olt <8 x float> %vec, zeroinitializer - %sel1 = select <8 x i1> %cmp, <8 x float> zeroinitializer, <8 x float> - %cmp, <8 x float> zeroinitializer, <8 x float> + %fmul1 = fmul <8 x float> zeroinitializer, %sel1 %shuffle = shufflevector <8 x float> %fmul1, <8 x float> zeroinitializer, <8 x i32> zeroinitializer - %fmul2 = fmul <8 x float> %shuffle, - + %fmul2 = fmul <8 x float> %shuffle, + %sel2 = select i1 %cond, ptr %ptr1, ptr %ptr2 store <8 x float> %fmul2, ptr %sel2, align 4 %fmul3 = fmul <8 x float> %shuffle, zeroinitializer From 6c22bb3a661621de34b90dd3889d89efd904ddcd Mon Sep 17 00:00:00 2001 From: abhishek-kaushik22 Date: Wed, 27 Nov 2024 12:06:54 +0530 Subject: [PATCH 5/5] Update X86ISelLowering.cpp --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 29de35ce688a5..50efb0a7e9ef5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45880,8 +45880,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG, // with those here. // FIXME: This is restricted to pre type legalization. If we loosen this we // need to convert vector bool to a scalar bool. - if (DCI.getDAGCombineLevel() < llvm::AfterLegalizeTypes && - Vec.getOpcode() == ISD::VSELECT && + if (DCI.isBeforeLegalize() && Vec.getOpcode() == ISD::VSELECT && Vec.getOperand(0).getOpcode() == ISD::SETCC && Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { assert(Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&