-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][SelectionDAG] Enable new partial reduction lowering by default #143565
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SelectionDAG] Enable new partial reduction lowering by default #143565
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Nicholas Guy (NickGuy-Arm) ChangesPatch is 193.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143565.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 766599d567efd..e610a63598a18 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -153,13 +153,6 @@ cl::opt<bool> EnableSVEGISel(
cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
cl::init(false));
-// FIXME : This is a temporary flag, and is used to help transition to
-// performing lowering the proper way using the new PARTIAL_REDUCE_MLA ISD
-// nodes.
-static cl::opt<bool> EnablePartialReduceNodes(
- "aarch64-enable-partial-reduce-nodes", cl::init(false), cl::ReallyHidden,
- cl::desc("Use the new method of lowering partial reductions."));
-
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
@@ -1457,7 +1450,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::FADD, VT, Custom);
- if (EnablePartialReduceNodes && Subtarget->hasDotProd()) {
+ if (Subtarget->hasDotProd()) {
static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
ISD::PARTIAL_REDUCE_UMLA};
@@ -1895,14 +1888,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// Handle partial reduction operations
- if (EnablePartialReduceNodes && Subtarget->isSVEorStreamingSVEAvailable()) {
+ if (Subtarget->isSVEorStreamingSVEAvailable()) {
// Mark known legal pairs as 'Legal' (these will expand to UDOT or SDOT).
// Other pairs will default to 'Expand'.
static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
ISD::PARTIAL_REDUCE_UMLA};
setPartialReduceMLAAction(MLAOps, MVT::nxv2i64, MVT::nxv8i16, Legal);
setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Legal);
-
setPartialReduceMLAAction(MLAOps, MVT::nxv2i64, MVT::nxv16i8, Custom);
if (Subtarget->hasMatMulInt8()) {
@@ -1957,17 +1949,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv2i64,
Custom);
- if (EnablePartialReduceNodes) {
- static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
- ISD::PARTIAL_REDUCE_UMLA};
- // Must be lowered to SVE instructions.
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
- setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
- }
+ static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+ ISD::PARTIAL_REDUCE_UMLA};
+ // Must be lowered to SVE instructions.
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
+ setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
}
}
@@ -2165,16 +2155,6 @@ bool AArch64TargetLowering::shouldExpandPartialReductionIntrinsic(
assert(I->getIntrinsicID() ==
Intrinsic::experimental_vector_partial_reduce_add &&
"Unexpected intrinsic!");
- if (EnablePartialReduceNodes)
- return true;
-
- EVT VT = EVT::getEVT(I->getType());
- auto Op1 = I->getOperand(1);
- EVT Op1VT = EVT::getEVT(Op1->getType());
- if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
- (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount() ||
- VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()))
- return false;
return true;
}
@@ -2252,26 +2232,24 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
bool PreferNEON = VT.is64BitVector() || VT.is128BitVector();
bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
- if (EnablePartialReduceNodes) {
- static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
- ISD::PARTIAL_REDUCE_UMLA};
- unsigned NumElts = VT.getVectorNumElements();
- if (VT.getVectorElementType() == MVT::i64) {
- setPartialReduceMLAAction(MLAOps, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
- } else if (VT.getVectorElementType() == MVT::i32) {
- setPartialReduceMLAAction(MLAOps, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
- setPartialReduceMLAAction(
- MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
- } else if (VT.getVectorElementType() == MVT::i16) {
- setPartialReduceMLAAction(MLAOps, VT,
- MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
- }
+ static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+ ISD::PARTIAL_REDUCE_UMLA};
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.getVectorElementType() == MVT::i64) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
+ setPartialReduceMLAAction(
+ MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
+ setPartialReduceMLAAction(
+ MLAOps, VT, MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
+ } else if (VT.getVectorElementType() == MVT::i32) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
+ setPartialReduceMLAAction(
+ MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
+ } else if (VT.getVectorElementType() == MVT::i16) {
+ setPartialReduceMLAAction(MLAOps, VT,
+ MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
}
// Lower fixed length vector operations to scalable equivalents.
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index 0c7b3c7d3c138..43404d1f871fe 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -1,15 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM
-; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM,CHECK-NODOT
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM
-; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NEWLOWERING-I8MM
+; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NODOT
+; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefix=CHECK-DOT
+; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefix=CHECK-DOT-I8MM
define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-DOT-LABEL: udot:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: udot v0.4s, v2.16b, v1.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: umull v3.8h, v2.8b, v1.8b
@@ -19,6 +13,16 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: uaddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: udot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: udot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <16 x i8> %u to <16 x i32>
%s.wide = zext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -27,22 +31,6 @@ define <4 x i32> @udot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
-; CHECK-DOT-LABEL: udot_in_loop:
-; CHECK-DOT: // %bb.0: // %entry
-; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
-; CHECK-DOT-NEXT: .LBB1_1: // %vector.body
-; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
-; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
-; CHECK-DOT-NEXT: udot v1.4s, v2.16b, v3.16b
-; CHECK-DOT-NEXT: cmp x8, #16
-; CHECK-DOT-NEXT: b.ne .LBB1_1
-; CHECK-DOT-NEXT: // %bb.2: // %end
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
@@ -63,6 +51,38 @@ define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
; CHECK-NODOT-NEXT: b.ne .LBB1_1
; CHECK-NODOT-NEXT: // %bb.2: // %end
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB1_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: udot v1.4s, v2.16b, v3.16b
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: b.ne .LBB1_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_in_loop:
+; CHECK-DOT-I8MM: // %bb.0: // %entry
+; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: .LBB1_1: // %vector.body
+; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: udot v1.4s, v2.16b, v3.16b
+; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: b.ne .LBB1_1
+; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
+; CHECK-DOT-I8MM-NEXT: ret
entry:
br label %vector.body
@@ -86,11 +106,6 @@ end:
}
define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
-; CHECK-DOT-LABEL: udot_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: udot v0.2s, v2.8b, v1.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: udot_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: umull v1.8h, v2.8b, v1.8b
@@ -105,6 +120,16 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
; CHECK-NODOT-NEXT: uaddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: udot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: udot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: udot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: udot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <8 x i8> %u to <8 x i32>
%s.wide = zext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -113,11 +138,6 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
}
define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-DOT-LABEL: sdot:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: sdot v0.4s, v2.16b, v1.16b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: smull v3.8h, v2.8b, v1.8b
@@ -127,6 +147,16 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: saddw2 v0.4s, v0.4s, v1.8h
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sdot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: sdot v0.4s, v2.16b, v1.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = sext <16 x i8> %u to <16 x i32>
%s.wide = sext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -135,11 +165,6 @@ define <4 x i32> @sdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
-; CHECK-DOT-LABEL: sdot_narrow:
-; CHECK-DOT: // %bb.0:
-; CHECK-DOT-NEXT: sdot v0.2s, v2.8b, v1.8b
-; CHECK-DOT-NEXT: ret
-;
; CHECK-NODOT-LABEL: sdot_narrow:
; CHECK-NODOT: // %bb.0:
; CHECK-NODOT-NEXT: smull v1.8h, v2.8b, v1.8b
@@ -154,6 +179,16 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
; CHECK-NODOT-NEXT: saddw v0.4s, v0.4s, v1.4h
; CHECK-NODOT-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NODOT-NEXT: ret
+;
+; CHECK-DOT-LABEL: sdot_narrow:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: sdot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-NEXT: ret
+;
+; CHECK-DOT-I8MM-LABEL: sdot_narrow:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: sdot v0.2s, v2.8b, v1.8b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = sext <8 x i8> %u to <8 x i32>
%s.wide = sext <8 x i8> %s to <8 x i32>
%mult = mul nuw nsw <8 x i32> %s.wide, %u.wide
@@ -162,27 +197,34 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
}
define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
-; CHECK-NOI8MM-LABEL: usdot:
-; CHECK-NOI8MM: // %bb.0:
-; CHECK-NOI8MM-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v1.8h, v1.16b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v4.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v4.8h, v3.8h
-; CHECK-NOI8MM-NEXT: smlal v0.4s, v2.4h, v1.4h
-; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot:
+; CHECK-NODOT: // %bb.0:
+; CHECK-NODOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-NODOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-NODOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot:
-; CHECK-I8MM: // %bb.0:
-; CHECK-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot:
+; CHECK-DOT: // %bb.0:
+; CHECK-DOT-NEXT: ushll v3.8h, v1.8b, #0
+; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll2 v1.8h, v1.16b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: smlal v0.4s, v4.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v4.8h, v3.8h
+; CHECK-DOT-NEXT: smlal v0.4s, v2.4h, v1.4h
+; CHECK-DOT-NEXT: smlal2 v0.4s, v2.8h, v1.8h
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot:
-; CHECK-NEWLOWERING-I8MM: // %bb.0:
-; CHECK-NEWLOWERING-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
-; CHECK-NEWLOWERING-I8MM-NEXT: ret
+; CHECK-DOT-I8MM-LABEL: usdot:
+; CHECK-DOT-I8MM: // %bb.0:
+; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
+; CHECK-DOT-I8MM-NEXT: ret
%u.wide = zext <16 x i8> %u to <16 x i32>
%s.wide = sext <16 x i8> %s to <16 x i32>
%mult = mul nuw nsw <16 x i32> %s.wide, %u.wide
@@ -191,60 +233,67 @@ define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
}
define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
-; CHECK-NOI8MM-LABEL: usdot_in_loop:
-; CHECK-NOI8MM: // %bb.0: // %entry
-; CHECK-NOI8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NOI8MM-NEXT: mov x8, xzr
-; CHECK-NOI8MM-NEXT: .LBB6_1: // %vector.body
-; CHECK-NOI8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NOI8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-NOI8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-NOI8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-NOI8MM-NEXT: add x8, x8, #16
-; CHECK-NOI8MM-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NOI8MM-NEXT: ushll v5.8h, v3.8b, #0
-; CHECK-NOI8MM-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NOI8MM-NEXT: ushll2 v3.8h, v3.16b, #0
-; CHECK-NOI8MM-NEXT: cmp x8, #16
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v4.4h, v5.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v4.8h, v5.8h
-; CHECK-NOI8MM-NEXT: smlal v1.4s, v2.4h, v3.4h
-; CHECK-NOI8MM-NEXT: smlal2 v1.4s, v2.8h, v3.8h
-; CHECK-NOI8MM-NEXT: b.ne .LBB6_1
-; CHECK-NOI8MM-NEXT: // %bb.2: // %end
-; CHECK-NOI8MM-NEXT: ret
+; CHECK-NODOT-LABEL: usdot_in_loop:
+; CHECK-NODOT: // %bb.0: // %entry
+; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: .LBB6_1: // %vector.body
+; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
+; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-NODOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-NODOT-NEXT: cmp x8, #16
+; CHECK-NODOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-NODOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-NODOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-NODOT-NEXT: b.ne .LBB6_1
+; CHECK-NODOT-NEXT: // %bb.2: // %end
+; CHECK-NODOT-NEXT: ret
;
-; CHECK-I8MM-LABEL: usdot_in_loop:
-; CHECK-I8MM: // %bb.0: // %entry
-; CHECK-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-I8MM-NEXT: mov x8, xzr
-; CHECK-I8MM-NEXT: .LBB6_1: // %vector.body
-; CHECK-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-I8MM-NEXT: add x8, x8, #16
-; CHECK-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
-; CHECK-I8MM-NEXT: cmp x8, #16
-; CHECK-I8MM-NEXT: b.ne .LBB6_1
-; CHECK-I8MM-NEXT: // %bb.2: // %end
-; CHECK-I8MM-NEXT: ret
+; CHECK-DOT-LABEL: usdot_in_loop:
+; CHECK-DOT: // %bb.0: // %entry
+; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
+; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: .LBB6_1: // %vector.body
+; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-DOT-NEXT: ldr q2, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: mov v0.16b, v1.16b
+; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
+; CHECK-DOT-NEXT: ushll v5.8h, v3.8b, #0
+; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: ushll2 v3.8h, v3.16b, #0
+; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: smlal v1.4s, v4.4h, v5.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
+; CHECK-DOT-NEXT: smlal v1.4s, v2.4h, v3.4h
+; CHECK-DOT-NEXT: smlal2 v1.4s, v2.8h, v3.8h
+; CHECK-DOT-NEXT: b.ne .LBB6_1
+; CHECK-DOT-NEXT: // %bb.2: // %end
+; CHECK-DOT-NEXT: ret
;
-; CHECK-NEWLOWERING-I8MM-LABEL: usdot_in_loop:
-; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
-; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #000...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
SamTebbs33
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
a58925f to
34819d9
Compare
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/14628 Here is the relevant piece of the build log for the reference |
| ; CHECK-COMMON-NEXT: mov v0.16b, v2.16b | ||
| ; CHECK-COMMON-NEXT: add x8, x8, #16 | ||
| ; CHECK-COMMON-NEXT: cmp x8, #16 | ||
| ; CHECK-COMMON-NEXT: tbl v7.16b, { v6.16b }, v3.16b |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a serious regression. It used to generate udot before.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@MDevereau is looking into fixing this!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Opened #144907
No description provided.