-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::VPMADD52L\H handling #156349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
1d75814
b3bf194
10ecfec
8e41815
64c3e95
f8d3ae9
51f53bd
ff40da4
39e3283
e4b366a
b095c3f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38994,9 +38994,49 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, | |
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth); | ||
break; | ||
} | ||
|
||
} | ||
break; | ||
} | ||
case X86ISD::VPMADD52L: | ||
case X86ISD::VPMADD52H: { | ||
EVT VT = Op.getValueType(); | ||
if (!VT.isVector() || VT.getScalarSizeInBits() != 64) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove unusued VT variable |
||
Known.resetAll(); | ||
return; | ||
} | ||
|
||
const unsigned BW = 64; | ||
APInt Low52 = APInt::getLowBitsSet(BW, 52); | ||
APInt High12 = APInt::getBitsSetFrom(BW, 52); | ||
|
||
KnownBits K0 = | ||
DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); | ||
KnownBits K1 = | ||
DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); | ||
KnownBits KAcc = | ||
DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); | ||
|
||
if ((K0.Zero & Low52) == Low52 || (K1.Zero & Low52) == Low52) { | ||
RKSimon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
Known = KAcc; | ||
return; | ||
} | ||
RKSimon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
KnownBits AddendKB(BW); | ||
AddendKB.Zero |= High12; | ||
|
||
KnownBits OutKB = | ||
KnownBits::computeForAddSub(true, false, false, KAcc, AddendKB); | ||
Known = OutKB; | ||
|
||
if ((KAcc.Zero & Low52) == Low52) { | ||
Known.One |= (KAcc.One & High12); | ||
Known.Zero |= (KAcc.Zero & High12); | ||
Known.Zero &= ~Known.One; | ||
} | ||
|
||
return; | ||
} | ||
} | ||
|
||
// Handle target shuffles. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512 | ||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX | ||
|
||
|
||
|
||
define <4 x i64> @test4_vpmadd52l_vl256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { | ||
; AVX512-LABEL: test4_vpmadd52l_vl256: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test4_vpmadd52l_vl256: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX-NEXT: retq | ||
%m1 = and <4 x i64> %x1, splat (i64 4503599627370495) | ||
XChy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
%m2 = and <4 x i64> %x2, splat (i64 4503599627370495) | ||
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %m1, <4 x i64> %m2) | ||
ret <4 x i64> %r | ||
} | ||
|
||
|
||
|
||
define <2 x i64> @test5_vpmadd52l_oporder(<2 x i64> %acc, <2 x i64> %mulA, <2 x i64> %mulB) { | ||
; AVX512-LABEL: test5_vpmadd52l_oporder: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test5_vpmadd52l_oporder: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 | ||
; AVX-NEXT: retq | ||
%a = and <2 x i64> %mulA, splat (i64 4503599627370495) | ||
%b = and <2 x i64> %mulB, splat (i64 4503599627370495) | ||
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %a, <2 x i64> %b) | ||
ret <2 x i64> %r | ||
} | ||
|
||
|
||
|
||
define <4 x i64> @test6_vpmadd52l_under_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { | ||
; AVX512-LABEL: test6_vpmadd52l_under_mask: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 | ||
; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm2 | ||
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test6_vpmadd52l_under_mask: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685247,2251799813685247,2251799813685247,2251799813685247] | ||
; AVX-NEXT: vpand %ymm3, %ymm1, %ymm1 | ||
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685248,2251799813685248,2251799813685248,2251799813685248] | ||
; AVX-NEXT: vpor %ymm3, %ymm2, %ymm2 | ||
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX-NEXT: retq | ||
%and = and <4 x i64> %x1, splat (i64 2251799813685247) | ||
%or = or <4 x i64> %x2, splat (i64 2251799813685248) | ||
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %and, <4 x i64> %or) | ||
ret <4 x i64> %r | ||
} | ||
|
||
|
||
|
||
define <2 x i64> @test7_vpmadd52h_ok(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { | ||
XChy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
; AVX512-LABEL: test7_vpmadd52h_ok: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test7_vpmadd52h_ok: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 | ||
; AVX-NEXT: retq | ||
%m1 = and <2 x i64> %x1, splat (i64 4503599627370495) | ||
%m2 = and <2 x i64> %x2, splat (i64 4503599627370495) | ||
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %m1, <2 x i64> %m2) | ||
ret <2 x i64> %r | ||
} | ||
|
||
define <4 x i64> @test8_vpmadd52h_vl256_misplaced_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { | ||
; AVX512-LABEL: test8_vpmadd52h_vl256_misplaced_mask: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 | ||
; AVX512-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test8_vpmadd52h_vl256_misplaced_mask: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495] | ||
; AVX-NEXT: vpand %ymm0, %ymm1, %ymm0 | ||
; AVX-NEXT: {vex} vpmadd52huq %ymm2, %ymm1, %ymm0 | ||
; AVX-NEXT: retq | ||
%mask = and <4 x i64> %x1, splat (i64 4503599627370495) | ||
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %mask, <4 x i64> %x1, <4 x i64> %x2) | ||
ret <4 x i64> %r | ||
} | ||
|
||
|
||
|
||
define <2 x i64> @test9_vpmadd52l_mix_and_or(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { | ||
; AVX512-LABEL: test9_vpmadd52l_mix_and_or: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: test9_vpmadd52l_mix_and_or: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 | ||
; AVX-NEXT: retq | ||
%a = and <2 x i64> %x1, splat (i64 4503599627370495) | ||
%b = or <2 x i64> %x2, splat (i64 0) | ||
XChy marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
%r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %a, <2 x i64> %b) | ||
ret <2 x i64> %r | ||
} | ||
|
||
define <4 x i64> @knownbits_propagate_high_from_acc(<4 x i64> %acc, <4 x i64> %x1, <4 x i64> %x2) { | ||
; AVX512-LABEL: knownbits_propagate_high_from_acc: | ||
; AVX512: # %bb.0: | ||
; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120] | ||
; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0 | ||
; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0 | ||
; AVX512-NEXT: retq | ||
; | ||
; AVX-LABEL: knownbits_propagate_high_from_acc: | ||
; AVX: # %bb.0: | ||
; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120] | ||
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0 | ||
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0 | ||
; AVX-NEXT: retq | ||
%acc_hi = and <4 x i64> %acc, | ||
|
||
<i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496> | ||
|
||
|
||
%m1 = and <4 x i64> %x1, | ||
<i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495> | ||
%m2 = and <4 x i64> %x2, | ||
<i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495> | ||
|
||
%r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc_hi, <4 x i64> %m1, <4 x i64> %m2) | ||
|
||
|
||
%only_high = and <4 x i64> %r, | ||
<i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496> | ||
ret <4 x i64> %only_high | ||
} | ||
|
||
|
||
|
||
|
||
; ---- intrinsics decls ---- | ||
declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) | ||
declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>) | ||
declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) | ||
declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should never happen - so just assert instead: