Skip to content

Conversation

@abhishek-kaushik22
Copy link
Contributor

@abhishek-kaushik22 abhishek-kaushik22 commented Nov 26, 2024

Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.

Closes #117684

Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.
@llvmbot
Copy link
Member

llvmbot commented Nov 26, 2024

@llvm/pr-subscribers-backend-x86

Author: None (abhishek-kaushik22)

Changes

Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.


Full diff: https://github.com/llvm/llvm-project/pull/117682.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+21-13)
  • (modified) llvm/test/CodeGen/X86/extractelement-fp.ll (+12-4)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e4533570f75086..0a0c54492c2995 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45842,7 +45842,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
 /// Extracting a scalar FP value from vector element 0 is free, so extract each
 /// operand first, then perform the math as a scalar op.
 static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
-                                 const X86Subtarget &Subtarget) {
+                                 const X86Subtarget &Subtarget,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
   assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
   SDValue Vec = ExtElt->getOperand(0);
   SDValue Index = ExtElt->getOperand(1);
@@ -45877,23 +45878,30 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
   // Vector FP selects don't fit the pattern of FP math ops (because the
   // condition has a different type and we have to change the opcode), so deal
   // with those here.
-  // FIXME: This is restricted to pre type legalization by ensuring the setcc
-  // has i1 elements. If we loosen this we need to convert vector bool to a
-  // scalar bool.
   if (Vec.getOpcode() == ISD::VSELECT &&
       Vec.getOperand(0).getOpcode() == ISD::SETCC &&
       Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
       Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
     // ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
     SDLoc DL(ExtElt);
-    SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                               Vec.getOperand(0).getValueType().getScalarType(),
-                               Vec.getOperand(0), Index);
-    SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                               Vec.getOperand(1), Index);
-    SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
-                               Vec.getOperand(2), Index);
-    return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+    bool AfterLegalize = DCI.getDAGCombineLevel() >= llvm::AfterLegalizeTypes;
+    EVT VecOperandType = Vec.getOperand(0).getValueType();
+    EVT LegalType = DAG.getTargetLoweringInfo().getTypeToTransformTo(
+        *DAG.getContext(), VecOperandType.getScalarType());
+    // The second condition checks that we don't create an invalid extract e.g
+    // 32 bit extract from a v*i64. This will cause a crash on 32-bit machines.
+    if (!AfterLegalize ||
+        LegalType.getSizeInBits() >= VecOperandType.getScalarSizeInBits()) {
+      SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                 AfterLegalize ? LegalType
+                                               : VecOperandType.getScalarType(),
+                                 Vec.getOperand(0), Index);
+      SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                                 Vec.getOperand(1), Index);
+      SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                                 Vec.getOperand(2), Index);
+      return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+    }
   }
 
   // TODO: This switch could include FNEG and the x86-specific FP logic ops
@@ -46242,7 +46250,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineArithReduction(N, DAG, Subtarget))
     return V;
 
-  if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
+  if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
     return V;
 
   if (CIdx)
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 944f6bbfd0bfbe..967493d3ac608b 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -320,10 +320,18 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
 ; X64-LABEL: extvselectsetcc_crash:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT:    vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; X64-NEXT:    vandpd %xmm2, %xmm1, %xmm1
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
+; X64-NEXT:    vmovq   %xmm1, %rax
+; X64-NEXT:    testq   %rax, %rax
+; X64-NEXT:    jne     .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    vxorpd  %xmm1, %xmm1, %xmm1
+; X64-NEXT:    jmp     .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT:  .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT:    vmovsd  {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X64-NEXT:  .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT:    vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
+; X64-NEXT:    vunpcklpd       %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: extvselectsetcc_crash:

Comment on lines +323 to +334
; X64-NEXT: vmovq %xmm1, %rax
; X64-NEXT: testq %rax, %rax
; X64-NEXT: jne .{{\.?LBB[0-9]+_[0-9]+}}
; X64-NEXT: # %bb.2:
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: jmp .{{\.?LBB[0-9]+_[0-9]+}}
; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
; X64-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
; X64-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
; X64-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks regression to me.

@RKSimon
Copy link
Collaborator

RKSimon commented Nov 26, 2024

Please can you regenerate extractelement-fp.ll as there looks to be other changes

@RKSimon
Copy link
Collaborator

RKSimon commented Nov 26, 2024

(sorry I had missed that this had been replaced with #117681)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86] Unexpected Illegal Type

4 participants