Skip to content

Commit 379e121

Browse files
authored
[X86] Compute the known bits for VPMADD52L/VPMADD52H in SimplifyDemandedBitsForTargetNode (#156847)
Address TODO and compute the known bits with the intermediate result.
1 parent 8f37668 commit 379e121

File tree

2 files changed

+115
-3
lines changed

2 files changed

+115
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44964,7 +44964,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4496444964
}
4496544965
case X86ISD::VPMADD52L:
4496644966
case X86ISD::VPMADD52H: {
44967-
KnownBits KnownOp0, KnownOp1;
44967+
KnownBits KnownOp0, KnownOp1, KnownOp2;
4496844968
SDValue Op0 = Op.getOperand(0);
4496944969
SDValue Op1 = Op.getOperand(1);
4497044970
SDValue Op2 = Op.getOperand(2);
@@ -44979,6 +44979,10 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4497944979
TLO, Depth + 1))
4498044980
return true;
4498144981

44982+
if (SimplifyDemandedBits(Op2, APInt::getAllOnes(64), OriginalDemandedElts,
44983+
KnownOp2, TLO, Depth + 1))
44984+
return true;
44985+
4498244986
KnownBits KnownMul;
4498344987
KnownOp0 = KnownOp0.trunc(52);
4498444988
KnownOp1 = KnownOp1.trunc(52);
@@ -44993,8 +44997,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4499344997
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ADD, DL, VT, C, Op2));
4499444998
}
4499544999

44996-
// TODO: Compute the known bits for VPMADD52L/VPMADD52H.
44997-
break;
45000+
Known = KnownBits::add(KnownMul, KnownOp2);
45001+
return false;
4499845002
}
4499945003
}
4500045004

llvm/test/CodeGen/X86/combine-vpmadd52.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,111 @@ define <2 x i64> @test_vpmadd52h_mul_hi52_negative(<2 x i64> %x0, <2 x i64> %x1,
290290
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and1, <2 x i64> %and2)
291291
ret <2 x i64> %1
292292
}
293+
294+
define <2 x i64> @test1_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
295+
; CHECK-LABEL: test1_knownbits_vpmadd52l:
296+
; CHECK: # %bb.0:
297+
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
298+
; CHECK-NEXT: # xmm0 = mem[0,0]
299+
; CHECK-NEXT: retq
300+
%and1 = and <2 x i64> %x0, splat (i64 4)
301+
%and2 = and <2 x i64> %x1, splat (i64 4)
302+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
303+
%ret = and <2 x i64> %madd, splat (i64 1)
304+
ret <2 x i64> %ret
305+
}
306+
307+
define <2 x i64> @test1_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
308+
; CHECK-LABEL: test1_knownbits_vpmadd52h:
309+
; CHECK: # %bb.0:
310+
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [3,3]
311+
; CHECK-NEXT: # xmm0 = mem[0,0]
312+
; CHECK-NEXT: retq
313+
%and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
314+
%and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
315+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 3), <2 x i64> %and1, <2 x i64> %and2)
316+
%ret = and <2 x i64> %madd, splat (i64 3)
317+
ret <2 x i64> %ret
318+
}
319+
320+
define <2 x i64> @test2_knownbits_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
321+
; CHECK-LABEL: test2_knownbits_vpmadd52l:
322+
; CHECK: # %bb.0:
323+
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1234,1234]
324+
; CHECK-NEXT: # xmm0 = mem[0,0]
325+
; CHECK-NEXT: retq
326+
%and1 = and <2 x i64> %x0, splat (i64 67108864) ; 1LL << 26
327+
%and2 = and <2 x i64> %x1, splat (i64 33554432) ; 1LL << 25
328+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1234), <2 x i64> %and1, <2 x i64> %and2)
329+
%ret = and <2 x i64> %madd, splat (i64 1234)
330+
ret <2 x i64> %ret
331+
}
332+
333+
define <2 x i64> @test2_knownbits_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
334+
; CHECK-LABEL: test2_knownbits_vpmadd52h:
335+
; CHECK: # %bb.0:
336+
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = [1,1]
337+
; CHECK-NEXT: # xmm0 = mem[0,0]
338+
; CHECK-NEXT: retq
339+
%and1 = and <2 x i64> %x0, splat (i64 1073741824) ; 1LL << 30
340+
%and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
341+
; add (1LL << 20) + 1
342+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1025), <2 x i64> %and1, <2 x i64> %and2)
343+
%ret = and <2 x i64> %madd, splat (i64 1)
344+
ret <2 x i64> %ret
345+
}
346+
347+
define <2 x i64> @test3_knownbits_vpmadd52l_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
348+
; AVX512-LABEL: test3_knownbits_vpmadd52l_negative:
349+
; AVX512: # %bb.0:
350+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
351+
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
352+
; AVX512-NEXT: vpor %xmm2, %xmm1, %xmm1
353+
; AVX512-NEXT: vmovdqa %xmm2, %xmm3
354+
; AVX512-NEXT: vpmadd52luq %xmm1, %xmm0, %xmm3
355+
; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
356+
; AVX512-NEXT: retq
357+
;
358+
; AVX-LABEL: test3_knownbits_vpmadd52l_negative:
359+
; AVX: # %bb.0:
360+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
361+
; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
362+
; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
363+
; AVX-NEXT: vmovdqa %xmm2, %xmm3
364+
; AVX-NEXT: {vex} vpmadd52luq %xmm1, %xmm0, %xmm3
365+
; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
366+
; AVX-NEXT: retq
367+
%and1 = and <2 x i64> %x0, splat (i64 67108865) ; (1LL << 26) + 1
368+
%or = or <2 x i64> %x1, splat (i64 1)
369+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %or)
370+
%ret = and <2 x i64> %madd, splat (i64 1)
371+
ret <2 x i64> %ret
372+
}
373+
374+
define <2 x i64> @test3_knownbits_vpmadd52h_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
375+
; AVX512-LABEL: test3_knownbits_vpmadd52h_negative:
376+
; AVX512: # %bb.0:
377+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
378+
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1
379+
; AVX512-NEXT: vpbroadcastq {{.*#+}} xmm2 = [1,1]
380+
; AVX512-NEXT: vmovdqa %xmm2, %xmm3
381+
; AVX512-NEXT: vpmadd52huq %xmm1, %xmm0, %xmm3
382+
; AVX512-NEXT: vpand %xmm2, %xmm3, %xmm0
383+
; AVX512-NEXT: retq
384+
;
385+
; AVX-LABEL: test3_knownbits_vpmadd52h_negative:
386+
; AVX: # %bb.0:
387+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
388+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
389+
; AVX-NEXT: vpmovsxbq {{.*#+}} xmm2 = [1,1]
390+
; AVX-NEXT: vmovdqa %xmm2, %xmm3
391+
; AVX-NEXT: {vex} vpmadd52huq %xmm1, %xmm0, %xmm3
392+
; AVX-NEXT: vpand %xmm2, %xmm3, %xmm0
393+
; AVX-NEXT: retq
394+
%and1 = and <2 x i64> %x0, splat (i64 4194304) ; 1LL << 22
395+
%and2 = and <2 x i64> %x1, splat (i64 1073741824) ; 1LL << 30
396+
; add (1LL << 20) + 1
397+
%madd = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> splat(i64 1), <2 x i64> %and1, <2 x i64> %and2)
398+
%ret = and <2 x i64> %madd, splat (i64 1)
399+
ret <2 x i64> %ret
400+
}

0 commit comments

Comments
 (0)