Skip to content

Conversation

houngkoungting
Copy link
Contributor

Fix #155386

@RKSimon

@llvmbot
Copy link
Member

llvmbot commented Sep 1, 2025

@llvm/pr-subscribers-backend-x86

Author: 黃國庭 (houngkoungting)

Changes

Fix #155386 ~

@RKSimon


Full diff: https://github.com/llvm/llvm-project/pull/156349.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+40)
  • (added) llvm/test/CodeGen/X86/combine-vpmadd52-1.ll (+159)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ab21cf534b304..733651a7adc62 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38994,9 +38994,49 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
       computeKnownBitsForPSADBW(LHS, RHS, Known, DemandedElts, DAG, Depth);
       break;
     }
+    
     }
     break;
   }
+  case X86ISD::VPMADD52L:
+  case X86ISD::VPMADD52H: {
+    EVT VT = Op.getValueType();
+    if (!VT.isVector() || VT.getScalarSizeInBits() != 64) {
+      Known.resetAll();
+      return;
+    }
+
+    const unsigned BW = 64;
+    APInt Low52 = APInt::getLowBitsSet(BW, 52);
+    APInt High12 = APInt::getBitsSetFrom(BW, 52);
+
+    KnownBits K0 =
+        DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    KnownBits K1 =
+        DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    KnownBits KAcc =
+        DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+
+    if ((K0.Zero & Low52) == Low52 || (K1.Zero & Low52) == Low52) {
+      Known = KAcc;
+      return;
+    }
+
+    KnownBits AddendKB(BW);
+    AddendKB.Zero |= High12;
+
+    KnownBits OutKB =
+        KnownBits::computeForAddSub(true, false, false, KAcc, AddendKB);
+    Known = OutKB;
+
+    if ((KAcc.Zero & Low52) == Low52) {
+      Known.One |= (KAcc.One & High12);
+      Known.Zero |= (KAcc.Zero & High12);
+      Known.Zero &= ~Known.One;
+    }
+
+    return;
+  }
   }
 
   // Handle target shuffles.
diff --git a/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll b/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll
new file mode 100644
index 0000000000000..8aefb5b8c373f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-vpmadd52-1.ll
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma                      | FileCheck %s --check-prefix=AVX
+
+
+define <4 x i64> @test4_vpmadd52l_vl256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test4_vpmadd52l_vl256:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test4_vpmadd52l_vl256:
+; AVX:       # %bb.0:
+; AVX-NEXT:    {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT:    retq
+  %m1 = and <4 x i64> %x1, splat (i64 4503599627370495)
+  %m2 = and <4 x i64> %x2, splat (i64 4503599627370495)
+  %r  = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %m1, <4 x i64> %m2)
+  ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test5_vpmadd52l_oporder(<2 x i64> %acc, <2 x i64> %mulA, <2 x i64> %mulB) {
+; AVX512-LABEL: test5_vpmadd52l_oporder:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test5_vpmadd52l_oporder:
+; AVX:       # %bb.0:
+; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %a = and <2 x i64> %mulA, splat (i64 4503599627370495)
+  %b = and <2 x i64> %mulB, splat (i64 4503599627370495)
+  %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %acc, <2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %r
+}
+
+
+
+define <4 x i64> @test6_vpmadd52l_under_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test6_vpmadd52l_under_mask:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512-NEXT:    vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm2
+; AVX512-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test6_vpmadd52l_under_mask:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [2251799813685247,2251799813685247,2251799813685247,2251799813685247]
+; AVX-NEXT:    vpand %ymm3, %ymm1, %ymm1
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [2251799813685248,2251799813685248,2251799813685248,2251799813685248]
+; AVX-NEXT:    vpor %ymm3, %ymm2, %ymm2
+; AVX-NEXT:    {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT:    retq
+  %and = and <4 x i64> %x1, splat (i64 2251799813685247)
+  %or  = or  <4 x i64> %x2, splat (i64 2251799813685248)
+  %r   = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %and, <4 x i64> %or)
+  ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test7_vpmadd52h_ok(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test7_vpmadd52h_ok:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test7_vpmadd52h_ok:
+; AVX:       # %bb.0:
+; AVX-NEXT:    {vex} vpmadd52huq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %m1 = and <2 x i64> %x1, splat (i64 4503599627370495)
+  %m2 = and <2 x i64> %x2, splat (i64 4503599627370495)
+  %r  = call <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %m1, <2 x i64> %m2)
+  ret <2 x i64> %r
+}
+
+define <4 x i64> @test8_vpmadd52h_vl256_misplaced_mask(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
+; AVX512-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test8_vpmadd52h_vl256_misplaced_mask:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [4503599627370495,4503599627370495,4503599627370495,4503599627370495]
+; AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    {vex} vpmadd52huq %ymm2, %ymm1, %ymm0
+; AVX-NEXT:    retq
+  %mask = and <4 x i64> %x1, splat (i64 4503599627370495)
+  %r    = call <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64> %mask, <4 x i64> %x1, <4 x i64> %x2)
+  ret <4 x i64> %r
+}
+
+
+
+define <2 x i64> @test9_vpmadd52l_mix_and_or(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX512-LABEL: test9_vpmadd52l_mix_and_or:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: test9_vpmadd52l_mix_and_or:
+; AVX:       # %bb.0:
+; AVX-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %a = and <2 x i64> %x1, splat (i64 4503599627370495)
+  %b = or  <2 x i64> %x2, splat (i64 0)
+  %r = call <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %r
+}
+
+define <4 x i64> @knownbits_propagate_high_from_acc(<4 x i64> %acc, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX512-LABEL: knownbits_propagate_high_from_acc:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
+; AVX512-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+;
+; AVX-LABEL: knownbits_propagate_high_from_acc:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpbroadcastq {{.*#+}} ymm3 = [18442240474082181120,18442240474082181120,18442240474082181120,18442240474082181120]
+; AVX-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT:    {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
+; AVX-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %acc_hi = and <4 x i64> %acc,
+            <i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496>
+
+
+  %m1 = and <4 x i64> %x1,
+        <i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495>
+  %m2 = and <4 x i64> %x2,
+        <i64 4503599627370495, i64 4503599627370495, i64 4503599627370495, i64 4503599627370495>
+
+  %r = call <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64> %acc_hi, <4 x i64> %m1, <4 x i64> %m2)
+
+
+  %only_high = and <4 x i64> %r,
+               <i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496, i64 -4503599627370496>
+  ret <4 x i64> %only_high
+}
+
+
+
+
+; ---- intrinsics decls ----
+declare <2 x i64> @llvm.x86.avx512.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.x86.avx512.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.x86.avx512.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+

Copy link

github-actions bot commented Sep 1, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RKSimon RKSimon changed the title [X86] X86TargetLowering::computeKnownBitsForTargetNode - add handling… [X86] X86TargetLowering::computeKnownBitsForTargetNode - add X86ISD::VPMADD52L\H handling Sep 1, 2025
@RKSimon RKSimon self-requested a review September 1, 2025 15:59
@@ -0,0 +1,159 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just add these tests to combine-vpmadd52.ll with suitable @knownbits_* test names

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The testcases may need to be rebased on #156293.

@RKSimon RKSimon requested a review from XChy September 1, 2025 16:05
@@ -0,0 +1,159 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512ifma,+avx512vl | FileCheck %s --check-prefix=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma | FileCheck %s --check-prefix=AVX
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The testcases may need to be rebased on #156293.

; AVX-NEXT: {vex} vpmadd52luq %ymm2, %ymm1, %ymm0
; AVX-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX-NEXT: retq
%acc_hi = and <4 x i64> %acc,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it a negative test?

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please can you resolve the merge clashes? There's now some missing tests that use the common CHECK prefix so that shouldn't be removed

@houngkoungting
Copy link
Contributor Author

HI I'll fix it later

case X86ISD::VPMADD52L:
case X86ISD::VPMADD52H: {
EVT VT = Op.getValueType();
if (!VT.isVector() || VT.getScalarSizeInBits() != 64) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should never happen - so just assert instead:

assert(Op.getValueType().isVector() && Op.getValueType().getScalarType() == MVT::i64 && "Unexpected VPMADD52 type");

@houngkoungting
Copy link
Contributor Author

I think PR #156847 modified the whole test case, which caused conflicts with my test changes. SO I create new one .

}
case X86ISD::VPMADD52L:
case X86ISD::VPMADD52H: {
EVT VT = Op.getValueType();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove unusued VT variable

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

avoid capital letters in a filename

@RKSimon RKSimon closed this Sep 17, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86] X86TargetLowering::computeKnownBitsForTargetNode - add handling for VPMADD52L/VPMADD52H nodes

4 participants