Skip to content

Commit 86879d4

Browse files
authored
[X86] Only fold AND/ANDNP back to VSELECT if we know the predicated mask select is legal (#156663)
By only checking type legality we didn't account for 128/256-bit ops being run on non-AVX512VL targets, or vXi8/i16 ops being run on non-AVX512BW targets This check is cropping up in several places now and I intend to hoist it out into a common helper, but this initial fix needs to be as clean as possible to be back ported to 21.X Fixes #156256
1 parent 20b4f59 commit 86879d4

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51856,6 +51856,8 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5185651856
SDValue X, Y;
5185751857
EVT CondVT = VT.changeVectorElementType(MVT::i1);
5185851858
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(CondVT) &&
51859+
(VT.is512BitVector() || Subtarget.hasVLX()) &&
51860+
(VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
5185951861
sd_match(N, m_And(m_Value(X),
5186051862
m_OneUse(m_SExt(m_AllOf(
5186151863
m_Value(Y), m_SpecificVT(CondVT),
@@ -55420,6 +55422,8 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
5542055422
SDValue Src = N0.getOperand(0);
5542155423
EVT SrcVT = Src.getValueType();
5542255424
if (Src.getOpcode() == ISD::SETCC && SrcVT.getScalarType() == MVT::i1 &&
55425+
(VT.is512BitVector() || Subtarget.hasVLX()) &&
55426+
(VT.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) &&
5542355427
TLI.isTypeLegal(SrcVT) && N0.hasOneUse() && Src.hasOneUse())
5542455428
return DAG.getSelect(DL, VT, DAG.getNOT(DL, Src, SrcVT), N1,
5542555429
getZeroVector(VT, Subtarget, DAG, DL));

llvm/test/CodeGen/X86/pr156256.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=AVX512
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefix=AVX512VL
4+
5+
define <16 x i16> @PR156256(<16 x i32> %a, <16 x i32> %b) {
6+
; AVX512-LABEL: PR156256:
7+
; AVX512: # %bb.0:
8+
; AVX512-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
9+
; AVX512-NEXT: vpmovm2d %k0, %zmm0
10+
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
11+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
12+
; AVX512-NEXT: retq
13+
;
14+
; AVX512VL-LABEL: PR156256:
15+
; AVX512VL: # %bb.0:
16+
; AVX512VL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
17+
; AVX512VL-NEXT: vpmovm2d %k0, %zmm0
18+
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
19+
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
20+
; AVX512VL-NEXT: retq
21+
%icmp = icmp ugt <16 x i32> %a, %b
22+
%sext = sext <16 x i1> %icmp to <16 x i16>
23+
%and = and <16 x i16> %sext, splat (i16 16256)
24+
ret <16 x i16> %and
25+
}

0 commit comments

Comments
 (0)