-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][SelectionDAG] Fix UDOT regression #144907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Remove broken check in AArch64ISelLowering for bailing from ZExt optimizations when there is a partial reduction intrinsic.
|
@llvm/pr-subscribers-backend-aarch64 Author: Matthew Devereau (MDevereau) ChangesFix broken check in AArch64ISelLowering for bailing from ZExt optimizations when there is a partial reduction intrinsic. Full diff: https://github.com/llvm/llvm-project/pull/144907.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 581f152776026..35ca365257a15 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16865,14 +16865,13 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
if (SrcWidth * 4 <= DstWidth) {
if (all_of(I->users(), [&](auto *U) {
auto *SingleUser = cast<Instruction>(&*U);
- return (
- match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))) ||
- (match(SingleUser,
- m_Intrinsic<
- Intrinsic::experimental_vector_partial_reduce_add>(
- m_Value(), m_Specific(I))) &&
- !shouldExpandPartialReductionIntrinsic(
- cast<IntrinsicInst>(SingleUser))));
+ bool IsMul =
+ match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())));
+ bool IsPRIntr = match(
+ SingleUser,
+ m_Intrinsic<Intrinsic::experimental_vector_partial_reduce_add>(
+ m_Value(), m_Specific(I)));
+ return IsMul || IsPRIntr;
}))
return false;
}
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index 0ea80a075fae9..0db4dc07fbfe2 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -772,35 +772,57 @@ define <4 x i32> @udot_no_bin_op(<4 x i32> %acc, <16 x i8> %a){
}
define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
-; CHECK-COMMON-LABEL: udot_no_bin_op_in_loop:
-; CHECK-COMMON: // %bb.0: // %entry
-; CHECK-COMMON-NEXT: adrp x8, .LCPI16_0
-; CHECK-COMMON-NEXT: movi v2.2d, #0000000000000000
-; CHECK-COMMON-NEXT: adrp x9, .LCPI16_2
-; CHECK-COMMON-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-COMMON-NEXT: adrp x8, .LCPI16_1
-; CHECK-COMMON-NEXT: adrp x10, .LCPI16_3
-; CHECK-COMMON-NEXT: ldr q3, [x8, :lo12:.LCPI16_1]
-; CHECK-COMMON-NEXT: ldr q4, [x9, :lo12:.LCPI16_2]
-; CHECK-COMMON-NEXT: ldr q5, [x10, :lo12:.LCPI16_3]
-; CHECK-COMMON-NEXT: mov x8, xzr
-; CHECK-COMMON-NEXT: .LBB16_1: // %vector.body
-; CHECK-COMMON-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-COMMON-NEXT: ldr q6, [x0, x8]
-; CHECK-COMMON-NEXT: mov v0.16b, v2.16b
-; CHECK-COMMON-NEXT: add x8, x8, #16
-; CHECK-COMMON-NEXT: cmp x8, #16
-; CHECK-COMMON-NEXT: tbl v7.16b, { v6.16b }, v3.16b
-; CHECK-COMMON-NEXT: tbl v16.16b, { v6.16b }, v4.16b
-; CHECK-COMMON-NEXT: tbl v17.16b, { v6.16b }, v5.16b
-; CHECK-COMMON-NEXT: tbl v6.16b, { v6.16b }, v1.16b
-; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v17.4s
-; CHECK-COMMON-NEXT: add v7.4s, v16.4s, v7.4s
-; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v7.4s
-; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v6.4s
-; CHECK-COMMON-NEXT: b.ne .LBB16_1
-; CHECK-COMMON-NEXT: // %bb.2: // %end
-; CHECK-COMMON-NEXT: ret
+; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: .LBB16_1: // %vector.body
+; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
+; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: cmp x8, #16
+; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v3.4h
+; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v3.8h
+; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v2.4h
+; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v2.8h
+; CHECK-NODOT-NEXT: b.ne .LBB16_1
+; CHECK-NODOT-NEXT: // %bb.2: // %end
+; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_no_bin_op_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: movi v2.16b, #1
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB16_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q3, [x0, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: udot v1.4s, v3.16b, v2.16b
+; CHECK-DOT-NEXT: b.ne .LBB16_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_in_loop:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB16_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB16_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
|
| m_Value(), m_Specific(I))) && | ||
| !shouldExpandPartialReductionIntrinsic( | ||
| cast<IntrinsicInst>(SingleUser)))); | ||
| bool IsMul = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Right now we no longer return early if IsMul is true, but I understand the code looks more readable now. If your intention is to reduce the level of indentation and split the if statements out to be more readable, what about
if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
return true;
if (match(SingleUser,
m_Intrinsic<Intrinsic::experimental_vector_partial_reduce_add>(
m_Value(), m_Specific(I))))
return true;
return false;
? If you prefer the original version that's also fine!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd forgotten about the early return. That's just as clear to me and keeps the behaviour the same, thanks!
david-arm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Beautiful!
Fix broken check in AArch64ISelLowering for bailing from ZExt optimizations when there is a partial reduction intrinsic.