-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::VPMADD52L\H handling #156349
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… for VPMADD52L/VPMADD52H nodes-1
@llvm/pr-subscribers-backend-x86 Author: 黃國庭 (houngkoungting) ChangesFix #155386 ~ @RKSimon Full diff: https://github.com/llvm/llvm-project/pull/156349.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ab21cf534b304..733651a7adc62 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38994,9 +38994,49 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
break;
}
+
}
break;
}
+ case X86ISD::VPMADD52L:
+ case X86ISD::VPMADD52H: {
+ EVT VT = Op.getValueType();
+ if (!VT.isVector() || VT.getScalarSizeInBits() != 64) {
+ Known.resetAll();
+ return;
+ }
+
+ const unsigned BW = 64;
+ APInt Low52 = APInt::getLowBitsSet(BW, 52);
+ APInt High12 = APInt::getBitsSetFrom(BW, 52);
+
+ KnownBits K0 =
+ DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits K1 =
+ DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ KnownBits KAcc =
+ DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+ if ((K0.Zero & Low52) == Low52 || (K1.Zero & Low52) == Low52) {
+ Known = KAcc;
+ return;
+ }
+
+ KnownBits AddendKB(BW);
+ AddendKB.Zero |= High12;
+
+ KnownBits OutKB =
+ KnownBits::computeForAddSub(true, false, false, KAcc, AddendKB);
+ Known = OutKB;
+
+ if ((KAcc.Zero & Low52) == Low52) {
+ Known.One |= (KAcc.One & High12);
+ Known.Zero |= (KAcc.Zero & High12);
+ Known.Zero &= ~Known.One;
+ }
+
+ return;
+ }
}
// Handle target shuffles.
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll b/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll
new file mode 100644
index 0000000000000..8aefb5b8c373f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX
+
+
+define <4 x i64> @test4_vpmadd52l_vl256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test4_vpmadd52l_vl256:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test4_vpmadd52l_vl256:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT: retq
+ %m1 = and <4 x i64> %x1, splat (i64 4503599627370495)
+ %m2 = and <4 x i64> %x2, splat (i64 4503599627370495)
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %m1, <4 x i64> %m2)
+ ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test5_vpmadd52l_oporder(<2 x i64> %acc, <2 x i64> %mulA, <2 x i64> %mulB) {
+; AVX512-LABEL: test5_vpmadd52l_oporder:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test5_vpmadd52l_oporder:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %a = and <2 x i64> %mulA, splat (i64 4503599627370495)
+ %b = and <2 x i64> %mulB, splat (i64 4503599627370495)
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %r
+}
+
+
+
+define <4 x i64> @test6_vpmadd52l_under_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test6_vpmadd52l_under_mask:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm2
+; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test6_vpmadd52l_under_mask:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685247,2251799813685247,2251799813685247,2251799813685247]
+; AVX-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [2251799813685248,2251799813685248,2251799813685248,2251799813685248]
+; AVX-NEXT: vpor %ymm3, %ymm2, %ymm2
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT: retq
+ %and = and <4 x i64> %x1, splat (i64 2251799813685247)
+ %or = or <4 x i64> %x2, splat (i64 2251799813685248)
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %and, <4 x i64> %or)
+ ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test7_vpmadd52h_ok(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test7_vpmadd52h_ok:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test7_vpmadd52h_ok:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %m1 = and <2 x i64> %x1, splat (i64 4503599627370495)
+ %m2 = and <2 x i64> %x2, splat (i64 4503599627370495)
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %m1, <2 x i64> %m2)
+ ret <2 x i64> %r
+}
+
+define <4 x i64> @test8_vpmadd52h_vl256_misplaced_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
+; AVX512-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495]
+; AVX-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX-NEXT: {vex} vpmadd52huq %ymm2, %ymm1, %ymm0
+; AVX-NEXT: retq
+ %mask = and <4 x i64> %x1, splat (i64 4503599627370495)
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %mask, <4 x i64> %x1, <4 x i64> %x2)
+ ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test9_vpmadd52l_mix_and_or(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test9_vpmadd52l_mix_and_or:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: test9_vpmadd52l_mix_and_or:
+; AVX: # %bb.0:
+; AVX-NEXT: {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %a = and <2 x i64> %x1, splat (i64 4503599627370495)
+ %b = or <2 x i64> %x2, splat (i64 0)
+ %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %r
+}
+
+define <4 x i64> @knownbits_propagate_high_from_acc(<4 x i64> %acc, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: knownbits_propagate_high_from_acc:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
+; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: retq
+;
+; AVX-LABEL: knownbits_propagate_high_from_acc:
+; AVX: # %bb.0:
+; AVX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
+; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %acc_hi = and <4 x i64> %acc,
+ <i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496>
+
+
+ %m1 = and <4 x i64> %x1,
+ <i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495>
+ %m2 = and <4 x i64> %x2,
+ <i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495>
+
+ %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc_hi, <4 x i64> %m1, <4 x i64> %m2)
+
+
+ %only_high = and <4 x i64> %r,
+ <i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496>
+ ret <4 x i64> %only_high
+}
+
+
+
+
+; ---- intrinsics decls ----
+declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -0,0 +1,159 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512 | |||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just add these tests to combine-vpmadd52.ll with suitable @knownbits_*
test names
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The testcases may need to be rebased on #156293.
@@ -0,0 +1,159 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512 | |||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The testcases may need to be rebased on #156293.
; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 | ||
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0 | ||
; AVX-NEXT: retq | ||
%acc_hi = and <4 x i64> %acc, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it a negative test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you resolve the merge clashes? There's now some missing tests that use the common CHECK prefix so that shouldn't be removed
HI I'll fix it later |
case X86ISD::VPMADD52L: | ||
case X86ISD::VPMADD52H: { | ||
EVT VT = Op.getValueType(); | ||
if (!VT.isVector() || VT.getScalarSizeInBits() != 64) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should never happen - so just assert instead:
assert(Op.getValueType().isVector() && Op.getValueType().getScalarType() == MVT::i64 && "Unexpected VPMADD52 type");
I think PR #156847 modified the whole test case, which caused conflicts with my test changes. SO I create new one . |
} | ||
case X86ISD::VPMADD52L: | ||
case X86ISD::VPMADD52H: { | ||
EVT VT = Op.getValueType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove unusued VT variable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avoid capital letters in a filename
Fix #155386 ~
@RKSimon