Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51348,6 +51348,8 @@ static SDValue combineAndOrForCcmpCtest(SDNode *N, SelectionDAG &DAG,
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
using namespace SDPatternMatch;

SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
Expand Down Expand Up @@ -51482,6 +51484,22 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}

// On AVX512 targets, attempt to reverse foldVSelectToSignBitSplatMask.
// to make use of predicated selects.
// AND(X,SEXT(SETCC())) -> SELECT(SETCC(),X,0)
if (DCI.isAfterLegalizeDAG() && VT.isVector()) {
SDValue X, Y;
EVT CondVT = VT.changeVectorElementType(MVT::i1);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(CondVT) &&
sd_match(N, m_And(m_Value(X),
m_OneUse(m_SExt(m_AllOf(
m_Value(Y), m_SpecificVT(CondVT),
m_SetCC(m_Value(), m_Value(), m_Value()))))))) {
return DAG.getSelect(dl, VT, Y, X,
getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl));
}
}

// Fold AND(SRL(X,Y),1) -> SETCC(BT(X,Y), COND_B) iff Y is not a constant
// avoids slow variable shift (moving shift amount to ECX etc.)
if (isOneConstant(N1) && N0->hasOneUse()) {
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/X86/gfni-lzcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,12 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
; GFNIAVX512BW-LABEL: testv64i8:
; GFNIAVX512BW: # %bb.0:
; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; GFNIAVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; GFNIAVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; GFNIAVX512BW-NEXT: vpmovm2b %k0, %zmm1
; GFNIAVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: retq
%out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
ret <64 x i8> %out
Expand Down Expand Up @@ -494,14 +492,12 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
; GFNIAVX512BW-LABEL: testv64i8u:
; GFNIAVX512BW: # %bb.0:
; GFNIAVX512BW-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; GFNIAVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; GFNIAVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
; GFNIAVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; GFNIAVX512BW-NEXT: vpmovm2b %k0, %zmm1
; GFNIAVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
; GFNIAVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; GFNIAVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; GFNIAVX512BW-NEXT: retq
%out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
ret <64 x i8> %out
Expand Down
40 changes: 16 additions & 24 deletions llvm/test/CodeGen/X86/vector-lzcnt-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -369,14 +369,12 @@ define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm3 {%k1} {z}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm3, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
Expand Down Expand Up @@ -455,14 +453,12 @@ define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm2
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm3, %zmm1, %zmm1
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm3 {%k1} {z}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm3, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
Expand Down Expand Up @@ -561,14 +557,12 @@ define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: testv64i8:
Expand Down Expand Up @@ -651,14 +645,12 @@ define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm2 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
; AVX512BW-NEXT: # zmm2 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm3
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpshufb %zmm0, %zmm2, %zmm0 {%k1} {z}
; AVX512BW-NEXT: vpshufb %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: testv64i8u:
Expand Down
18 changes: 6 additions & 12 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,8 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64
define <64 x i8> @combine_vpermi2var_v64i8_with_mask(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2) {
; CHECK-LABEL: combine_vpermi2var_v64i8_with_mask:
; CHECK: # %bb.0:
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0
; CHECK-NEXT: vpmovb2m %zmm1, %k0
; CHECK-NEXT: vpmovm2b %k0, %zmm1
; CHECK-NEXT: vpandq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: vpmovb2m %zmm1, %k1
; CHECK-NEXT: vpermt2b %zmm2, %zmm1, %zmm0 {%k1} {z}
; CHECK-NEXT: ret{{[l|q]}}
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %a0, <64 x i8> %a1, <64 x i8> %a2)
%cmp = icmp slt <64 x i8> %a1, zeroinitializer
Expand All @@ -177,19 +175,15 @@ define <64 x i8> @combine_vpermi2var_constant_v64i8_with_mask(<64 x i8> %a0) {
; X86-LABEL: combine_vpermi2var_constant_v64i8_with_mask:
; X86: # %bb.0:
; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
; X86-NEXT: vpermt2b {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
; X86-NEXT: vpmovb2m %zmm0, %k0
; X86-NEXT: vpmovm2b %k0, %zmm0
; X86-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X86-NEXT: vpmovb2m %zmm0, %k1
; X86-NEXT: vpermi2b {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: combine_vpermi2var_constant_v64i8_with_mask:
; X64: # %bb.0:
; X64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63]
; X64-NEXT: vpermt2b {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
; X64-NEXT: vpmovb2m %zmm0, %k0
; X64-NEXT: vpmovm2b %k0, %zmm0
; X64-NEXT: vpandq %zmm1, %zmm0, %zmm0
; X64-NEXT: vpmovb2m %zmm0, %k1
; X64-NEXT: vpermi2b {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
%perm = tail call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> %a0, <64 x i8> <i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127>)
%cmp = icmp slt <64 x i8> %a0, zeroinitializer
Expand Down
Loading