Skip to content

Commit 5ff49a7

Browse files
committed
Address comments and fix test check
1 parent 6e40a1e commit 5ff49a7

File tree

2 files changed

+153
-27
lines changed

2 files changed

+153
-27
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7752,11 +7752,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
77527752
case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
77537753
return LowerVECTOR_HISTOGRAM(Op, DAG);
77547754
case ISD::PARTIAL_REDUCE_SMLA:
7755-
case ISD::PARTIAL_REDUCE_UMLA: {
7756-
if (SDValue Result = LowerPARTIAL_REDUCE_MLA(Op, DAG))
7757-
return Result;
7758-
return expandPartialReduceMLA(Op.getNode(), DAG);
7759-
}
7755+
case ISD::PARTIAL_REDUCE_UMLA:
7756+
return LowerPARTIAL_REDUCE_MLA(Op, DAG);
77607757
}
77617758
}
77627759

@@ -27585,8 +27582,6 @@ void AArch64TargetLowering::ReplaceNodeResults(
2758527582
case ISD::PARTIAL_REDUCE_SMLA: {
2758627583
if (SDValue Res = LowerPARTIAL_REDUCE_MLA(SDValue(N, 0), DAG))
2758727584
Results.push_back(Res);
27588-
else
27589-
Results.push_back(expandPartialReduceMLA(N, DAG));
2759027585
return;
2759127586
}
2759227587
case ISD::ADD:
@@ -29538,9 +29533,13 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
2953829533
}
2953929534

2954029535
/// If a PARTIAL_REDUCE_MLA node comes in with an accumulator-input type pairing
29541-
/// of v2i64/v16i8, we cannot directly lower it to a (u|s)dot. We can
29536+
/// of (nx)v2i64/(nx)v16i8, we cannot directly lower it to a (u|s)dot. We can
2954229537
/// however still make use of the dot product instruction by instead
29543-
/// accumulating over two steps: v16i8 -> v4i32 -> v2i64.
29538+
/// accumulating over two steps: (nx)v16i8 -> (nx)v4i32 -> (nx)v2i64.
29539+
/// If available, make use of the (U|S)ADDW(B|T) instructions, otherwise
29540+
/// the following pattern is emitted:
29541+
/// add(add(Acc, ext(EXTRACT_SUBVECTOR(N, 0)), ext(EXTRACT_SUBVECTOR(N,
29542+
/// NTy/2))))
2954429543
SDValue
2954529544
AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(SDValue Op,
2954629545
SelectionDAG &DAG) const {
@@ -29575,27 +29574,17 @@ AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(SDValue Op,
2957529574
return DAG.getNode(HiOpcode, DL, ResultVT, Lo, DotNode);
2957629575
}
2957729576

29578-
if (Scalable) {
29579-
unsigned LoOpcode = IsUnsigned ? AArch64ISD::UUNPKLO : AArch64ISD::SUNPKLO;
29580-
unsigned HiOpcode = IsUnsigned ? AArch64ISD::UUNPKHI : AArch64ISD::SUNPKHI;
29581-
auto Lo = DAG.getNode(LoOpcode, DL, ResultVT, DotNode);
29582-
auto Hi = DAG.getNode(HiOpcode, DL, ResultVT, DotNode);
29583-
auto Extended = DAG.getNode(ISD::ADD, DL, ResultVT, Lo, Hi);
29584-
return DAG.getNode(ISD::ADD, DL, ResultVT, Acc, Extended);
29585-
}
29586-
29587-
// Fold v4i32 into v2i64
29588-
// SDValues
29577+
// Fold (nx)v4i32 into (nx)v2i64
2958929578
auto [DotNodeLo, DotNodeHi] = DAG.SplitVector(DotNode, DL);
2959029579
if (IsUnsigned) {
29591-
DotNodeLo = DAG.getZExtOrTrunc(DotNodeLo, DL, MVT::v2i64);
29592-
DotNodeHi = DAG.getZExtOrTrunc(DotNodeHi, DL, MVT::v2i64);
29580+
DotNodeLo = DAG.getZExtOrTrunc(DotNodeLo, DL, ResultVT);
29581+
DotNodeHi = DAG.getZExtOrTrunc(DotNodeHi, DL, ResultVT);
2959329582
} else {
29594-
DotNodeLo = DAG.getSExtOrTrunc(DotNodeLo, DL, MVT::v2i64);
29595-
DotNodeHi = DAG.getSExtOrTrunc(DotNodeHi, DL, MVT::v2i64);
29583+
DotNodeLo = DAG.getSExtOrTrunc(DotNodeLo, DL, ResultVT);
29584+
DotNodeHi = DAG.getSExtOrTrunc(DotNodeHi, DL, ResultVT);
2959629585
}
29597-
auto Lo = DAG.getNode(ISD::ADD, DL, MVT::v2i64, Acc, DotNodeLo);
29598-
return DAG.getNode(ISD::ADD, DL, MVT::v2i64, Lo, DotNodeHi);
29586+
auto Lo = DAG.getNode(ISD::ADD, DL, ResultVT, Acc, DotNodeLo);
29587+
return DAG.getNode(ISD::ADD, DL, ResultVT, Lo, DotNodeHi);
2959929588
}
2960029589

2960129590
SDValue

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM
33
; RUN: llc -mtriple aarch64 -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM,CHECK-NODOT
4-
; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM
4+
; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM,CHECK-DOT-I8MM
55
; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-I8MM,CHECK-NEWLOWERING-I8MM
66
; RUN: llc -mtriple aarch64 -mattr=+neon,+dotprod -aarch64-enable-partial-reduce-nodes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DOT,CHECK-NOI8MM,CHECK-NEWLOWERING-NOI8MM
77

@@ -175,6 +175,11 @@ define <4 x i32> @usdot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) {
175175
; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h
176176
; CHECK-NOI8MM-NEXT: ret
177177
;
178+
; CHECK-DOT-I8MM-LABEL: usdot:
179+
; CHECK-DOT-I8MM: // %bb.0:
180+
; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v1.16b, v2.16b
181+
; CHECK-DOT-I8MM-NEXT: ret
182+
;
178183
; CHECK-NEWLOWERING-I8MM-LABEL: usdot:
179184
; CHECK-NEWLOWERING-I8MM: // %bb.0:
180185
; CHECK-NEWLOWERING-I8MM-NEXT: ushll v3.8h, v1.8b, #0
@@ -217,6 +222,22 @@ define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
217222
; CHECK-NOI8MM-NEXT: // %bb.2: // %end
218223
; CHECK-NOI8MM-NEXT: ret
219224
;
225+
; CHECK-DOT-I8MM-LABEL: usdot_in_loop:
226+
; CHECK-DOT-I8MM: // %bb.0: // %entry
227+
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
228+
; CHECK-DOT-I8MM-NEXT: mov x8, xzr
229+
; CHECK-DOT-I8MM-NEXT: .LBB6_1: // %vector.body
230+
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
231+
; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
232+
; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
233+
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
234+
; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
235+
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
236+
; CHECK-DOT-I8MM-NEXT: cmp x8, #16
237+
; CHECK-DOT-I8MM-NEXT: b.ne .LBB6_1
238+
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
239+
; CHECK-DOT-I8MM-NEXT: ret
240+
;
220241
; CHECK-NEWLOWERING-I8MM-LABEL: usdot_in_loop:
221242
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
222243
; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000
@@ -279,6 +300,11 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
279300
; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s
280301
; CHECK-NOI8MM-NEXT: ret
281302
;
303+
; CHECK-DOT-I8MM-LABEL: usdot_narrow:
304+
; CHECK-DOT-I8MM: // %bb.0:
305+
; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v1.8b, v2.8b
306+
; CHECK-DOT-I8MM-NEXT: ret
307+
;
282308
; CHECK-NEWLOWERING-I8MM-LABEL: usdot_narrow:
283309
; CHECK-NEWLOWERING-I8MM: // %bb.0:
284310
; CHECK-NEWLOWERING-I8MM-NEXT: ushll v1.8h, v1.8b, #0
@@ -315,6 +341,11 @@ define <4 x i32> @sudot(<4 x i32> %acc, <16 x i8> %u, <16 x i8> %s) #0{
315341
; CHECK-NOI8MM-NEXT: smlal2 v0.4s, v2.8h, v1.8h
316342
; CHECK-NOI8MM-NEXT: ret
317343
;
344+
; CHECK-DOT-I8MM-LABEL: sudot:
345+
; CHECK-DOT-I8MM: // %bb.0:
346+
; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v2.16b, v1.16b
347+
; CHECK-DOT-I8MM-NEXT: ret
348+
;
318349
; CHECK-NEWLOWERING-I8MM-LABEL: sudot:
319350
; CHECK-NEWLOWERING-I8MM: // %bb.0:
320351
; CHECK-NEWLOWERING-I8MM-NEXT: sshll v3.8h, v1.8b, #0
@@ -357,6 +388,22 @@ define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){
357388
; CHECK-NOI8MM-NEXT: // %bb.2: // %end
358389
; CHECK-NOI8MM-NEXT: ret
359390
;
391+
; CHECK-DOT-I8MM-LABEL: sudot_in_loop:
392+
; CHECK-DOT-I8MM: // %bb.0: // %entry
393+
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
394+
; CHECK-DOT-I8MM-NEXT: mov x8, xzr
395+
; CHECK-DOT-I8MM-NEXT: .LBB9_1: // %vector.body
396+
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
397+
; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
398+
; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
399+
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
400+
; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
401+
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b
402+
; CHECK-DOT-I8MM-NEXT: cmp x8, #16
403+
; CHECK-DOT-I8MM-NEXT: b.ne .LBB9_1
404+
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
405+
; CHECK-DOT-I8MM-NEXT: ret
406+
;
360407
; CHECK-NEWLOWERING-I8MM-LABEL: sudot_in_loop:
361408
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
362409
; CHECK-NEWLOWERING-I8MM-NEXT: movi v1.2d, #0000000000000000
@@ -419,6 +466,11 @@ define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
419466
; CHECK-NOI8MM-NEXT: add v0.2s, v1.2s, v0.2s
420467
; CHECK-NOI8MM-NEXT: ret
421468
;
469+
; CHECK-DOT-I8MM-LABEL: sudot_narrow:
470+
; CHECK-DOT-I8MM: // %bb.0:
471+
; CHECK-DOT-I8MM-NEXT: usdot v0.2s, v2.8b, v1.8b
472+
; CHECK-DOT-I8MM-NEXT: ret
473+
;
422474
; CHECK-NEWLOWERING-I8MM-LABEL: sudot_narrow:
423475
; CHECK-NEWLOWERING-I8MM: // %bb.0:
424476
; CHECK-NEWLOWERING-I8MM-NEXT: sshll v1.8h, v1.8b, #0
@@ -461,6 +513,14 @@ define <4 x i64> @udot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
461513
; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v4.4s
462514
; CHECK-NODOT-NEXT: ret
463515
;
516+
; CHECK-DOT-I8MM-LABEL: udot_8to64:
517+
; CHECK-DOT-I8MM: // %bb.0: // %entry
518+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
519+
; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
520+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
521+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
522+
; CHECK-DOT-I8MM-NEXT: ret
523+
;
464524
; CHECK-NEWLOWERING-I8MM-LABEL: udot_8to64:
465525
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
466526
; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
@@ -504,6 +564,14 @@ define <4 x i64> @sdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
504564
; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v4.4s
505565
; CHECK-NODOT-NEXT: ret
506566
;
567+
; CHECK-DOT-I8MM-LABEL: sdot_8to64:
568+
; CHECK-DOT-I8MM: // %bb.0: // %entry
569+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
570+
; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
571+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
572+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
573+
; CHECK-DOT-I8MM-NEXT: ret
574+
;
507575
; CHECK-NEWLOWERING-I8MM-LABEL: sdot_8to64:
508576
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
509577
; CHECK-NEWLOWERING-I8MM-NEXT: movi v4.2d, #0000000000000000
@@ -553,6 +621,14 @@ define <4 x i64> @usdot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b){
553621
; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
554622
; CHECK-NOI8MM-NEXT: ret
555623
;
624+
; CHECK-DOT-I8MM-LABEL: usdot_8to64:
625+
; CHECK-DOT-I8MM: // %bb.0: // %entry
626+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
627+
; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v2.16b, v3.16b
628+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
629+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
630+
; CHECK-DOT-I8MM-NEXT: ret
631+
;
556632
; CHECK-NEWLOWERING-I8MM-LABEL: usdot_8to64:
557633
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
558634
; CHECK-NEWLOWERING-I8MM-NEXT: ushll v4.8h, v2.8b, #0
@@ -610,6 +686,14 @@ define <4 x i64> @sudot_8to64(<4 x i64> %acc, <16 x i8> %a, <16 x i8> %b) {
610686
; CHECK-NOI8MM-NEXT: smlal2 v1.2d, v2.4s, v3.4s
611687
; CHECK-NOI8MM-NEXT: ret
612688
;
689+
; CHECK-DOT-I8MM-LABEL: sudot_8to64:
690+
; CHECK-DOT-I8MM: // %bb.0: // %entry
691+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
692+
; CHECK-DOT-I8MM-NEXT: usdot v4.4s, v3.16b, v2.16b
693+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
694+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
695+
; CHECK-DOT-I8MM-NEXT: ret
696+
;
613697
; CHECK-NEWLOWERING-I8MM-LABEL: sudot_8to64:
614698
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
615699
; CHECK-NEWLOWERING-I8MM-NEXT: sshll v4.8h, v2.8b, #0
@@ -684,6 +768,22 @@ define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
684768
; CHECK-NODOT-NEXT: // %bb.2: // %end
685769
; CHECK-NODOT-NEXT: ret
686770
;
771+
; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_in_loop:
772+
; CHECK-DOT-I8MM: // %bb.0: // %entry
773+
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
774+
; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
775+
; CHECK-DOT-I8MM-NEXT: mov x8, xzr
776+
; CHECK-DOT-I8MM-NEXT: .LBB16_1: // %vector.body
777+
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
778+
; CHECK-DOT-I8MM-NEXT: ldr q3, [x0, x8]
779+
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
780+
; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
781+
; CHECK-DOT-I8MM-NEXT: cmp x8, #16
782+
; CHECK-DOT-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b
783+
; CHECK-DOT-I8MM-NEXT: b.ne .LBB16_1
784+
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
785+
; CHECK-DOT-I8MM-NEXT: ret
786+
;
687787
; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_in_loop:
688788
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
689789
; CHECK-NEWLOWERING-I8MM-NEXT: adrp x8, .LCPI16_0
@@ -854,6 +954,15 @@ define <4 x i64> @udot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
854954
; CHECK-NODOT-NEXT: uaddw2 v0.2d, v0.2d, v3.4s
855955
; CHECK-NODOT-NEXT: ret
856956
;
957+
; CHECK-DOT-I8MM-LABEL: udot_no_bin_op_8to64:
958+
; CHECK-DOT-I8MM: // %bb.0:
959+
; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1
960+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
961+
; CHECK-DOT-I8MM-NEXT: udot v4.4s, v2.16b, v3.16b
962+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
963+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
964+
; CHECK-DOT-I8MM-NEXT: ret
965+
;
857966
; CHECK-NEWLOWERING-I8MM-LABEL: udot_no_bin_op_8to64:
858967
; CHECK-NEWLOWERING-I8MM: // %bb.0:
859968
; CHECK-NEWLOWERING-I8MM-NEXT: ushll v3.8h, v2.8b, #0
@@ -913,6 +1022,15 @@ define <4 x i64> @sdot_no_bin_op_8to64(<4 x i64> %acc, <16 x i8> %a){
9131022
; CHECK-NODOT-NEXT: saddw2 v0.2d, v0.2d, v3.4s
9141023
; CHECK-NODOT-NEXT: ret
9151024
;
1025+
; CHECK-DOT-I8MM-LABEL: sdot_no_bin_op_8to64:
1026+
; CHECK-DOT-I8MM: // %bb.0:
1027+
; CHECK-DOT-I8MM-NEXT: movi v3.16b, #1
1028+
; CHECK-DOT-I8MM-NEXT: movi v4.2d, #0000000000000000
1029+
; CHECK-DOT-I8MM-NEXT: sdot v4.4s, v2.16b, v3.16b
1030+
; CHECK-DOT-I8MM-NEXT: saddw2 v1.2d, v1.2d, v4.4s
1031+
; CHECK-DOT-I8MM-NEXT: saddw v0.2d, v0.2d, v4.2s
1032+
; CHECK-DOT-I8MM-NEXT: ret
1033+
;
9161034
; CHECK-NEWLOWERING-I8MM-LABEL: sdot_no_bin_op_8to64:
9171035
; CHECK-NEWLOWERING-I8MM: // %bb.0:
9181036
; CHECK-NEWLOWERING-I8MM-NEXT: sshll v3.8h, v2.8b, #0
@@ -1101,6 +1219,25 @@ define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) {
11011219
; CHECK-NOI8MM-NEXT: add v0.4s, v1.4s, v0.4s
11021220
; CHECK-NOI8MM-NEXT: ret
11031221
;
1222+
; CHECK-DOT-I8MM-LABEL: usdot_multiple_zext_users:
1223+
; CHECK-DOT-I8MM: // %bb.0: // %entry
1224+
; CHECK-DOT-I8MM-NEXT: movi v0.2d, #0000000000000000
1225+
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
1226+
; CHECK-DOT-I8MM-NEXT: mov x8, xzr
1227+
; CHECK-DOT-I8MM-NEXT: .LBB28_1: // %vector.body
1228+
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
1229+
; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
1230+
; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
1231+
; CHECK-DOT-I8MM-NEXT: ldr q4, [x2, x8]
1232+
; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
1233+
; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
1234+
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
1235+
; CHECK-DOT-I8MM-NEXT: cmp x8, #1024
1236+
; CHECK-DOT-I8MM-NEXT: b.ne .LBB28_1
1237+
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
1238+
; CHECK-DOT-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
1239+
; CHECK-DOT-I8MM-NEXT: ret
1240+
;
11041241
; CHECK-NEWLOWERING-I8MM-LABEL: usdot_multiple_zext_users:
11051242
; CHECK-NEWLOWERING-I8MM: // %bb.0: // %entry
11061243
; CHECK-NEWLOWERING-I8MM-NEXT: movi v0.2d, #0000000000000000

0 commit comments

Comments
 (0)