Skip to content

Commit 466eb9f

Browse files
committed
Update doc-comment and add sme test run line
1 parent 1d0994b commit 466eb9f

File tree

2 files changed

+21
-5
lines changed

2 files changed

+21
-5
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29514,11 +29514,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_HISTOGRAM(SDValue Op,
2951429514
return Scatter;
2951529515
}
2951629516

29517-
/// If a PARTIAL_REDUCE_MLA node comes in with an accumulator type that is too
29518-
/// wide to be used for (u|s)dot, we can still make use of the dot product
29519-
/// instruction by instead treating the accumulator as a vector type with twice
29520-
/// as many elements that are each half as wide, accumulating the low and high
29521-
/// parts of the result together in the actual accumulator afterwards.
29517+
/// If a PARTIAL_REDUCE_MLA node comes in with an accumulator-input type pairing
29518+
/// of nxv2i64/nxv16i8, we cannot directly lower it to a (u|s)dot. We can
29519+
/// however still make use of the dot product instruction by instead
29520+
/// accumulating over two steps: nxv16i8 -> nxv4i32 -> nxv2i64.
2952229521
SDValue
2952329522
AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(SDValue Op,
2952429523
SelectionDAG &DAG) const {

llvm/test/CodeGen/AArch64/sve-partial-reduce-dot-product.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
44
; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
55
; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
6+
; RUN: llc -mtriple=aarch64 -mattr=+sme -force-streaming -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SME
67

78
define <vscale x 4 x i32> @udot(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
89
; CHECK-LABEL: udot:
@@ -214,6 +215,14 @@ define <vscale x 4 x i64> @udot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
214215
; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z4.s
215216
; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z4.s
216217
; CHECK-NEWLOWERING-SVE2-NEXT: ret
218+
;
219+
; CHECK-NEWLOWERING-SME-LABEL: udot_8to64:
220+
; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
221+
; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
222+
; CHECK-NEWLOWERING-SME-NEXT: udot z4.s, z2.b, z3.b
223+
; CHECK-NEWLOWERING-SME-NEXT: uaddwb z0.d, z0.d, z4.s
224+
; CHECK-NEWLOWERING-SME-NEXT: uaddwt z0.d, z0.d, z4.s
225+
; CHECK-NEWLOWERING-SME-NEXT: ret
217226
entry:
218227
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i64>
219228
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i64>
@@ -251,6 +260,14 @@ define <vscale x 4 x i64> @sdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
251260
; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
252261
; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
253262
; CHECK-NEWLOWERING-SVE2-NEXT: ret
263+
;
264+
; CHECK-NEWLOWERING-SME-LABEL: sdot_8to64:
265+
; CHECK-NEWLOWERING-SME: // %bb.0: // %entry
266+
; CHECK-NEWLOWERING-SME-NEXT: mov z4.s, #0 // =0x0
267+
; CHECK-NEWLOWERING-SME-NEXT: sdot z4.s, z2.b, z3.b
268+
; CHECK-NEWLOWERING-SME-NEXT: saddwb z0.d, z0.d, z4.s
269+
; CHECK-NEWLOWERING-SME-NEXT: saddwt z0.d, z0.d, z4.s
270+
; CHECK-NEWLOWERING-SME-NEXT: ret
254271
entry:
255272
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i64>
256273
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i64>

0 commit comments

Comments
 (0)