Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38997,6 +38997,28 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
}
case X86ISD::VPMADD52L:
case X86ISD::VPMADD52H: {
EVT VT = Op.getValueType();
if (!VT.isVector() || VT.getScalarSizeInBits() != 64) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should never happen - so just assert instead:

assert(Op.getValueType().isVector() && Op.getValueType().getScalarType() == MVT::i64 && "Unexpected VPMADD52 type");

Known.resetAll();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove unusued VT variable

return;
}
KnownBits K0 =
DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
KnownBits K1 =
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
KnownBits KAcc =
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
K0 = K0.trunc(52);
K1 = K1.trunc(52);
KnownBits KnownMul = (Op.getOpcode() == X86ISD::VPMADD52L)
? KnownBits::mul(K0, K1)
: KnownBits::mulhu(K0, K1);
KnownMul = KnownMul.zext(64);
Known = KnownBits::add(KAcc, KnownMul);
return;
}
}

// Handle target shuffles.
Expand Down
148 changes: 148 additions & 0 deletions llvm/test/CodeGen/X86/combine-vpmadd52.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefixes=CHECK,AVX


define <2 x i64> @test1_vpmadd52l(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test1_vpmadd52l:
; AVX512: # %bb.0:
Expand Down Expand Up @@ -102,5 +103,152 @@ define <2 x i64> @test_vpmadd52h(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
%1 = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %and, <2 x i64> %or)
ret <2 x i64> %1
}



define <4 x i64> @test4_vpmadd52l_vl256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
; AVX512-LABEL: test4_vpmadd52l_vl256:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test4_vpmadd52l_vl256:
; AVX: # %bb.0:
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX-NEXT: retq
; keep only low 52 bits of %x1/%x2
%m1 = and <4 x i64> %x1, splat (i64 4503599627370495) ; (1 << 52) - 1
%m2 = and <4 x i64> %x2, splat (i64 4503599627370495) ; (1 << 52) - 1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %m1, <4 x i64> %m2)
ret <4 x i64> %r
}

define <2 x i64> @test5_vpmadd52l_oporder(<2 x i64> %acc, <2 x i64> %mulA, <2 x i64> %mulB) {
; AVX512-LABEL: test5_vpmadd52l_oporder:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test5_vpmadd52l_oporder:
; AVX: # %bb.0:
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
; keep only low 52 bits of mulA/mulB
%a = and <2 x i64> %mulA, splat (i64 4503599627370495) ; (1 << 52) - 1
%b = and <2 x i64> %mulB, splat (i64 4503599627370495) ; (1 << 52) - 1
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %r
}

define <4 x i64> @test6_vpmadd52l_under_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
; AVX512-LABEL: test6_vpmadd52l_under_mask:
; AVX512: # %bb.0:
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm2
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test6_vpmadd52l_under_mask:
; AVX: # %bb.0:
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685247,2251799813685247,2251799813685247,2251799813685247]
; AVX-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685248,2251799813685248,2251799813685248,2251799813685248]
; AVX-NEXT: vpor %ymm3, %ymm2, %ymm2
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX-NEXT: retq
; keep only low 51 bits of %x1, force bit 51 of %x2 to 1
%andv = and <4 x i64> %x1, splat (i64 2251799813685247) ; (1 << 51) - 1
%orv = or <4 x i64> %x2, splat (i64 2251799813685248) ; 1 << 51
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %andv, <4 x i64> %orv)
ret <4 x i64> %r
}

define <2 x i64> @test7_vpmadd52h_ok(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test7_vpmadd52h_ok:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test7_vpmadd52h_ok:
; AVX: # %bb.0:
; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
; keep only low 52 bits of %x1/%x2
%m1 = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1 << 52) - 1
%m2 = and <2 x i64> %x2, splat (i64 4503599627370495) ; (1 << 52) - 1
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %m1, <2 x i64> %m2)
ret <2 x i64> %r
}

define <4 x i64> @test8_vpmadd52h_vl256_misplaced_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
; AVX512-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
; AVX512: # %bb.0:
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
; AVX512-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
; AVX: # %bb.0:
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495]
; AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
; AVX-NEXT: {vex} vpmadd52huq %ymm2, %ymm1, %ymm0
; AVX-NEXT: retq
; keep only low 52 bits of %x1, then place it into %x0 operand position
%mask = and <4 x i64> %x1, splat (i64 4503599627370495) ; (1 << 52) - 1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %mask, <4 x i64> %x1, <4 x i64> %x2)
ret <4 x i64> %r
}

define <2 x i64> @test9_vpmadd52l_mix_and_or(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
; AVX512-LABEL: test9_vpmadd52l_mix_and_or:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX512-NEXT: retq
;
; AVX-LABEL: test9_vpmadd52l_mix_and_or:
; AVX: # %bb.0:
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
; keep only low 52 bits of %x1
%a = and <2 x i64> %x1, splat (i64 4503599627370495) ; (1 << 52) - 1
; force high 12 bits of %x2 to 1
%b = or <2 x i64> %x2, splat (i64 -4503599627370496) ; ~((1 << 52) - 1) = -(1 << 52)
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %r
}
; Positive test: ensure only the high 12 bits from %acc propagate through VPMADD52L.
define <4 x i64> @knownbits_propagate_high_from_acc(<4 x i64> %acc, <4 x i64> %x1, <4 x i64> %x2) {
; AVX512-LABEL: knownbits_propagate_high_from_acc:
; AVX512: # %bb.0:
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX512-NEXT: retq
;
; AVX-LABEL: knownbits_propagate_high_from_acc:
; AVX: # %bb.0:
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX-NEXT: retq
; mask high 12 bits of accumulator (unsigned value 18442240474082181120 == 0xFFF0000000000000)
%acc_hi = and <4 x i64> %acc, splat (i64 -4503599627370496) ; ~((1 << 52) - 1) = -(1 << 52)
; keep only low 52 bits of multipliers
%m1 = and <4 x i64> %x1, splat (i64 4503599627370495) ; (1 << 52) - 1
%m2 = and <4 x i64> %x2, splat (i64 4503599627370495) ; (1 << 52) - 1
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc_hi, <4 x i64> %m1, <4 x i64> %m2)
; keep only high 12 bits
%only_high = and <4 x i64> %r, splat (i64 -4503599627370496) ; ~((1 << 52) - 1) = -(1 << 52)
ret <4 x i64> %only_high
}



declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}