Skip to content

Commit 8687ef7

Browse files
authored
[X86] SimplifyDemandedBitsForTargetNode - add handling for VPMADD52L/VPMADD52H (#155494)
Resolves #155387. The X86ISD::VPMADD52L/VPMADD52H nodes only demand the lower 52 bits of operands 0 / 1.
1 parent 60cdc3d commit 8687ef7

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44957,6 +44957,24 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4495744957
Known.Zero.setLowBits(Known2.countMinTrailingZeros());
4495844958
return false;
4495944959
}
44960+
case X86ISD::VPMADD52L:
44961+
case X86ISD::VPMADD52H: {
44962+
KnownBits KnownOp0, KnownOp1;
44963+
SDValue Op0 = Op.getOperand(0);
44964+
SDValue Op1 = Op.getOperand(1);
44965+
// Only demand the lower 52-bits of operands 0 / 1 (and all 64-bits of
44966+
// operand 2).
44967+
APInt Low52Bits = APInt::getLowBitsSet(BitWidth, 52);
44968+
if (SimplifyDemandedBits(Op0, Low52Bits, OriginalDemandedElts, KnownOp0,
44969+
TLO, Depth + 1))
44970+
return true;
44971+
44972+
if (SimplifyDemandedBits(Op1, Low52Bits, OriginalDemandedElts, KnownOp1,
44973+
TLO, Depth + 1))
44974+
return true;
44975+
// TODO: Compute the known bits for VPMADD52L/VPMADD52H.
44976+
break;
44977+
}
4496044978
}
4496144979

4496244980
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -60073,6 +60091,19 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
6007360091
return SDValue();
6007460092
}
6007560093

60094+
// Simplify VPMADD52L/VPMADD52H operations.
60095+
static SDValue combineVPMADD52LH(SDNode *N, SelectionDAG &DAG,
60096+
TargetLowering::DAGCombinerInfo &DCI) {
60097+
MVT VT = N->getSimpleValueType(0);
60098+
unsigned NumEltBits = VT.getScalarSizeInBits();
60099+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
60100+
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumEltBits),
60101+
DCI))
60102+
return SDValue(N, 0);
60103+
60104+
return SDValue();
60105+
}
60106+
6007660107
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
6007760108
TargetLowering::DAGCombinerInfo &DCI,
6007860109
const X86Subtarget &Subtarget) {
@@ -60710,6 +60741,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
6071060741
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
6071160742
case X86ISD::VPMADDUBSW:
6071260743
case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
60744+
case X86ISD::VPMADD52L:
60745+
case X86ISD::VPMADD52H: return combineVPMADD52LH(N, DAG, DCI);
6071360746
case X86ISD::KSHIFTL:
6071460747
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
6071560748
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX
4+
5+
define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
6+
; AVX512-LABEL: test1_vpmadd52l:
7+
; AVX512: # %bb.0:
8+
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
9+
; AVX512-NEXT: retq
10+
;
11+
; AVX-LABEL: test1_vpmadd52l:
12+
; AVX: # %bb.0:
13+
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
14+
; AVX-NEXT: retq
15+
16+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
17+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %x2)
18+
ret <2 x i64> %1
19+
}
20+
21+
define <2 x i64> @test2_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
22+
; AVX512-LABEL: test2_vpmadd52l:
23+
; AVX512: # %bb.0:
24+
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
25+
; AVX512-NEXT: retq
26+
;
27+
; AVX-LABEL: test2_vpmadd52l:
28+
; AVX: # %bb.0:
29+
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
30+
; AVX-NEXT: retq
31+
%and = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1LL << 52) - 1
32+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %and)
33+
ret <2 x i64> %1
34+
}
35+
36+
define <2 x i64> @test3_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
37+
; AVX512-LABEL: test3_vpmadd52l:
38+
; AVX512: # %bb.0:
39+
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
40+
; AVX512-NEXT: retq
41+
;
42+
; AVX-LABEL: test3_vpmadd52l:
43+
; AVX: # %bb.0:
44+
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
45+
; AVX-NEXT: retq
46+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
47+
%or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
48+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
49+
ret <2 x i64> %1
50+
}
51+
52+
define <2 x i64> @test_vpmadd52l_wrong_bits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
53+
; AVX512-LABEL: test_vpmadd52l_wrong_bits:
54+
; AVX512: # %bb.0:
55+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
56+
; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm2
57+
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
58+
; AVX512-NEXT: retq
59+
;
60+
; AVX-LABEL: test_vpmadd52l_wrong_bits:
61+
; AVX: # %bb.0:
62+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
63+
; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
64+
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
65+
; AVX-NEXT: retq
66+
%and = and <2 x i64> %x1, splat (i64 2251799813685247) ; (1LL << 51) - 1
67+
%or = or <2 x i64> %x2, splat (i64 2251799813685248) ; 1LL << 51
68+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
69+
ret <2 x i64> %1
70+
}
71+
72+
define <2 x i64> @test_vpmadd52l_wrong_op(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
73+
; AVX512-LABEL: test_vpmadd52l_wrong_op:
74+
; AVX512: # %bb.0:
75+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
76+
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
77+
; AVX512-NEXT: retq
78+
;
79+
; AVX-LABEL: test_vpmadd52l_wrong_op:
80+
; AVX: # %bb.0:
81+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
82+
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
83+
; AVX-NEXT: retq
84+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
85+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %and, <2 x i64> %x1, <2 x i64> %x2)
86+
ret <2 x i64> %1
87+
}
88+
89+
define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
90+
; AVX512-LABEL: test_vpmadd52h:
91+
; AVX512: # %bb.0:
92+
; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
93+
; AVX512-NEXT: retq
94+
;
95+
; AVX-LABEL: test_vpmadd52h:
96+
; AVX: # %bb.0:
97+
; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
98+
; AVX-NEXT: retq
99+
100+
%and = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1LL << 52) - 1
101+
%or = or <2 x i64> %x2, splat (i64 4503599627370496) ; 1LL << 52
102+
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
103+
ret <2 x i64> %1
104+
}
105+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
106+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)