Skip to content

Conversation

@MDevereau
Copy link
Contributor

Fix broken check in AArch64ISelLowering for bailing from ZExt optimizations when there is a partial reduction intrinsic.

Remove broken check in AArch64ISelLowering for bailing
from ZExt optimizations when there is a partial reduction
intrinsic.
@llvmbot
Copy link
Member

llvmbot commented Jun 19, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Matthew Devereau (MDevereau)

Changes

Fix broken check in AArch64ISelLowering for bailing from ZExt optimizations when there is a partial reduction intrinsic.


Full diff: https://github.com/llvm/llvm-project/pull/144907.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+7-8)
  • (modified) llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll (+51-29)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 581f152776026..35ca365257a15 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16865,14 +16865,13 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
     if (SrcWidth * 4 <= DstWidth) {
       if (all_of(I->users(), [&](auto *U) {
             auto *SingleUser = cast<Instruction>(&*U);
-            return (
-                match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))) ||
-                (match(SingleUser,
-                       m_Intrinsic<
-                           Intrinsic::experimental_vector_partial_reduce_add>(
-                           m_Value(), m_Specific(I))) &&
-                 !shouldExpandPartialReductionIntrinsic(
-                     cast<IntrinsicInst>(SingleUser))));
+            bool IsMul =
+                match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())));
+            bool IsPRIntr = match(
+                SingleUser,
+                m_Intrinsic<Intrinsic::experimental_vector_partial_reduce_add>(
+                    m_Value(), m_Specific(I)));
+            return IsMul || IsPRIntr;
           }))
         return false;
     }
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index 0ea80a075fae9..0db4dc07fbfe2 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -772,35 +772,57 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
 }
 
 define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
-; CHECK-COMMON-LABEL: udot_no_bin_op_in_loop:
-; CHECK-COMMON:       // %bb.0: // %entry
-; CHECK-COMMON-NEXT:    adrp x8, .LCPI16_0
-; CHECK-COMMON-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-COMMON-NEXT:    adrp x9, .LCPI16_2
-; CHECK-COMMON-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-COMMON-NEXT:    adrp x8, .LCPI16_1
-; CHECK-COMMON-NEXT:    adrp x10, .LCPI16_3
-; CHECK-COMMON-NEXT:    ldr q3, [x8, :lo12:.LCPI16_1]
-; CHECK-COMMON-NEXT:    ldr q4, [x9, :lo12:.LCPI16_2]
-; CHECK-COMMON-NEXT:    ldr q5, [x10, :lo12:.LCPI16_3]
-; CHECK-COMMON-NEXT:    mov x8, xzr
-; CHECK-COMMON-NEXT:  .LBB16_1: // %vector.body
-; CHECK-COMMON-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-COMMON-NEXT:    ldr q6, [x0, x8]
-; CHECK-COMMON-NEXT:    mov v0.16b, v2.16b
-; CHECK-COMMON-NEXT:    add x8, x8, #16
-; CHECK-COMMON-NEXT:    cmp x8, #16
-; CHECK-COMMON-NEXT:    tbl v7.16b, { v6.16b }, v3.16b
-; CHECK-COMMON-NEXT:    tbl v16.16b, { v6.16b }, v4.16b
-; CHECK-COMMON-NEXT:    tbl v17.16b, { v6.16b }, v5.16b
-; CHECK-COMMON-NEXT:    tbl v6.16b, { v6.16b }, v1.16b
-; CHECK-COMMON-NEXT:    add v2.4s, v2.4s, v17.4s
-; CHECK-COMMON-NEXT:    add v7.4s, v16.4s, v7.4s
-; CHECK-COMMON-NEXT:    add v2.4s, v2.4s, v7.4s
-; CHECK-COMMON-NEXT:    add v2.4s, v2.4s, v6.4s
-; CHECK-COMMON-NEXT:    b.ne .LBB16_1
-; CHECK-COMMON-NEXT:  // %bb.2: // %end
-; CHECK-COMMON-NEXT:    ret
+; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
+; CHECK-NODOT:       // %bb.0: // %entry
+; CHECK-NODOT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT:    mov x8, xzr
+; CHECK-NODOT-NEXT:  .LBB16_1: // %vector.body
+; CHECK-NODOT-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT:    ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT:    mov v0.16b, v1.16b
+; CHECK-NODOT-NEXT:    add x8, x8, #16
+; CHECK-NODOT-NEXT:    cmp x8, #16
+; CHECK-NODOT-NEXT:    ushll v3.8h, v2.8b, #0
+; CHECK-NODOT-NEXT:    ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT:    uaddw v1.4s, v1.4s, v3.4h
+; CHECK-NODOT-NEXT:    uaddw2 v1.4s, v1.4s, v3.8h
+; CHECK-NODOT-NEXT:    uaddw v1.4s, v1.4s, v2.4h
+; CHECK-NODOT-NEXT:    uaddw2 v1.4s, v1.4s, v2.8h
+; CHECK-NODOT-NEXT:    b.ne .LBB16_1
+; CHECK-NODOT-NEXT:  // %bb.2: // %end
+; CHECK-NODOT-NEXT:    ret
+;
+; CHECK-DOT-LABEL: udot_no_bin_op_in_loop:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT:    movi v2.16b, #1
+; CHECK-DOT-NEXT:    mov x8, xzr
+; CHECK-DOT-NEXT:  .LBB16_1: // %vector.body
+; CHECK-DOT-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT:    ldr q3, [x0, x8]
+; CHECK-DOT-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-NEXT:    add x8, x8, #16
+; CHECK-DOT-NEXT:    cmp x8, #16
+; CHECK-DOT-NEXT:    udot v1.4s, v3.16b, v2.16b
+; CHECK-DOT-NEXT:    b.ne .LBB16_1
+; CHECK-DOT-NEXT:  // %bb.2: // %end
+; CHECK-DOT-NEXT:    ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_in_loop:
+; CHECK-DOT-I8MM:       // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT:    movi v2.16b, #1
+; CHECK-DOT-I8MM-NEXT:    mov x8, xzr
+; CHECK-DOT-I8MM-NEXT:  .LBB16_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT:    ldr q3, [x0, x8]
+; CHECK-DOT-I8MM-NEXT:    mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT:    add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT:    cmp x8, #16
+; CHECK-DOT-I8MM-NEXT:    udot v1.4s, v3.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT:    b.ne .LBB16_1
+; CHECK-DOT-I8MM-NEXT:  // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT:    ret
 entry:
   br label %vector.body
 

m_Value(), m_Specific(I))) &&
!shouldExpandPartialReductionIntrinsic(
cast<IntrinsicInst>(SingleUser))));
bool IsMul =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Right now we no longer return early if IsMul is true, but I understand the code looks more readable now. If your intention is to reduce the level of indentation and split the if statements out to be more readable, what about

  if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
    return true;
  if (match(SingleUser,
            m_Intrinsic<Intrinsic::experimental_vector_partial_reduce_add>(
            m_Value(), m_Specific(I))))
    return true;
  return false;

? If you prefer the original version that's also fine!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd forgotten about the early return. That's just as clear to me and keeps the behaviour the same, thanks!

Copy link
Contributor

@david-arm david-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM! Beautiful!

@MDevereau MDevereau merged commit f704782 into llvm:main Jun 20, 2025
7 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants