11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-I8MM
33; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOI8MM
4- ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING
4+ ; RUN: llc -mtriple=aarch64 -mattr=+sve,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE
5+ ; RUN: llc -mtriple=aarch64 -mattr=+sve2,+i8mm -aarch64-enable-partial-reduce-nodes %s -o - | FileCheck %s --check-prefixes=CHECK-NEWLOWERING,CHECK-NEWLOWERING-SVE2
56
67define <vscale x 4 x i32 > @udot (<vscale x 4 x i32 > %acc , <vscale x 16 x i8 > %a , <vscale x 16 x i8 > %b ) {
78; CHECK-LABEL: udot:
@@ -196,13 +197,23 @@ define <vscale x 4 x i64> @udot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
196197; CHECK-NEXT: add z1.d, z1.d, z3.d
197198; CHECK-NEXT: ret
198199;
199- ; CHECK-NEWLOWERING-LABEL: udot_8to64:
200- ; CHECK-NEWLOWERING: // %bb.0: // %entry
201- ; CHECK-NEWLOWERING-NEXT: movi v4.2d, #0000000000000000
202- ; CHECK-NEWLOWERING-NEXT: udot z4.s, z2.b, z3.b
203- ; CHECK-NEWLOWERING-NEXT: uaddwb z0.d, z0.d, z4.s
204- ; CHECK-NEWLOWERING-NEXT: uaddwt z0.d, z0.d, z4.s
205- ; CHECK-NEWLOWERING-NEXT: ret
200+ ; CHECK-NEWLOWERING-SVE-LABEL: udot_8to64:
201+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
202+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
203+ ; CHECK-NEWLOWERING-SVE-NEXT: udot z4.s, z2.b, z3.b
204+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpkhi z2.d, z4.s
205+ ; CHECK-NEWLOWERING-SVE-NEXT: uunpklo z3.d, z4.s
206+ ; CHECK-NEWLOWERING-SVE-NEXT: add z2.d, z3.d, z2.d
207+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
208+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
209+ ;
210+ ; CHECK-NEWLOWERING-SVE2-LABEL: udot_8to64:
211+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
212+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
213+ ; CHECK-NEWLOWERING-SVE2-NEXT: udot z4.s, z2.b, z3.b
214+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwb z0.d, z0.d, z4.s
215+ ; CHECK-NEWLOWERING-SVE2-NEXT: uaddwt z0.d, z0.d, z4.s
216+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
206217entry:
207218 %a.wide = zext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
208219 %b.wide = zext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
@@ -223,13 +234,23 @@ define <vscale x 4 x i64> @sdot_8to64(<vscale x 4 x i64> %acc, <vscale x 16 x i8
223234; CHECK-NEXT: add z1.d, z1.d, z3.d
224235; CHECK-NEXT: ret
225236;
226- ; CHECK-NEWLOWERING-LABEL: sdot_8to64:
227- ; CHECK-NEWLOWERING: // %bb.0: // %entry
228- ; CHECK-NEWLOWERING-NEXT: movi v4.2d, #0000000000000000
229- ; CHECK-NEWLOWERING-NEXT: sdot z4.s, z2.b, z3.b
230- ; CHECK-NEWLOWERING-NEXT: saddwb z0.d, z0.d, z4.s
231- ; CHECK-NEWLOWERING-NEXT: saddwt z0.d, z0.d, z4.s
232- ; CHECK-NEWLOWERING-NEXT: ret
237+ ; CHECK-NEWLOWERING-SVE-LABEL: sdot_8to64:
238+ ; CHECK-NEWLOWERING-SVE: // %bb.0: // %entry
239+ ; CHECK-NEWLOWERING-SVE-NEXT: movi v4.2d, #0000000000000000
240+ ; CHECK-NEWLOWERING-SVE-NEXT: sdot z4.s, z2.b, z3.b
241+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpkhi z2.d, z4.s
242+ ; CHECK-NEWLOWERING-SVE-NEXT: sunpklo z3.d, z4.s
243+ ; CHECK-NEWLOWERING-SVE-NEXT: add z2.d, z3.d, z2.d
244+ ; CHECK-NEWLOWERING-SVE-NEXT: add z0.d, z0.d, z2.d
245+ ; CHECK-NEWLOWERING-SVE-NEXT: ret
246+ ;
247+ ; CHECK-NEWLOWERING-SVE2-LABEL: sdot_8to64:
248+ ; CHECK-NEWLOWERING-SVE2: // %bb.0: // %entry
249+ ; CHECK-NEWLOWERING-SVE2-NEXT: movi v4.2d, #0000000000000000
250+ ; CHECK-NEWLOWERING-SVE2-NEXT: sdot z4.s, z2.b, z3.b
251+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwb z0.d, z0.d, z4.s
252+ ; CHECK-NEWLOWERING-SVE2-NEXT: saddwt z0.d, z0.d, z4.s
253+ ; CHECK-NEWLOWERING-SVE2-NEXT: ret
233254entry:
234255 %a.wide = sext <vscale x 16 x i8 > %a to <vscale x 16 x i64 >
235256 %b.wide = sext <vscale x 16 x i8 > %b to <vscale x 16 x i64 >
0 commit comments