Skip to content

Commit b6ff6b7

Browse files
committed
Update sme test with +i8mm
1 parent 5e0d1c4 commit b6ff6b7

File tree

1 file changed

+13
-66
lines changed

1 file changed

+13
-66
lines changed

llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll

Lines changed: 13 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mattr=+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,NEON
33
; RUN: llc -mattr=+sve,+dotprod,+i8mm -aarch64-enable-partial-reduce-nodes=true < %s | FileCheck %s --check-prefixes=COMMON,SVE
4-
; RUN: llc -mattr=+sme -aarch64-enable-partial-reduce-nodes=true -force-streaming < %s | FileCheck %s --check-prefix=SME
4+
; RUN: llc -mattr=+sme,+i8mm -aarch64-enable-partial-reduce-nodes=true -force-streaming < %s | FileCheck %s --check-prefix=SME
55

66
target triple = "aarch64"
77

@@ -418,23 +418,10 @@ define <4 x i32> @four_way_i8_i32_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
418418
;
419419
; SME-LABEL: four_way_i8_i32_vl128_usdot:
420420
; SME: // %bb.0:
421-
; SME-NEXT: ptrue p0.s, vl4
422-
; SME-NEXT: ldr q2, [x0]
423-
; SME-NEXT: mov w8, #4 // =0x4
424-
; SME-NEXT: ld1b { z0.s }, p0/z, [x1]
425-
; SME-NEXT: ld1sb { z1.s }, p0/z, [x2]
426-
; SME-NEXT: mad z0.s, p0/m, z1.s, z2.s
427-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8]
428-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8]
429-
; SME-NEXT: mov w8, #8 // =0x8
430-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
431-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8]
432-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8]
433-
; SME-NEXT: mov w8, #12 // =0xc
434-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
435-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, x8]
436-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8]
437-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
421+
; SME-NEXT: ldr q0, [x0]
422+
; SME-NEXT: ldr q1, [x1]
423+
; SME-NEXT: ldr q2, [x2]
424+
; SME-NEXT: usdot z0.s, z1.b, z2.b
438425
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
439426
; SME-NEXT: ret
440427
%acc = load <4 x i32>, ptr %accptr
@@ -491,41 +478,11 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width_usdot(ptr %accptr, ptr %upt
491478
;
492479
; SME-LABEL: four_way_i8_i32_vl128_double_width_usdot:
493480
; SME: // %bb.0:
494-
; SME-NEXT: ptrue p0.s, vl4
495-
; SME-NEXT: mov w8, #16 // =0x10
496-
; SME-NEXT: mov w9, #4 // =0x4
497-
; SME-NEXT: ldp q5, q4, [x0]
498-
; SME-NEXT: ld1b { z0.s }, p0/z, [x1, x8]
499-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1]
500-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, x8]
501-
; SME-NEXT: ld1sb { z3.s }, p0/z, [x2]
502-
; SME-NEXT: mov w8, #20 // =0x14
503-
; SME-NEXT: ld1b { z6.s }, p0/z, [x1, x8]
504-
; SME-NEXT: mad z0.s, p0/m, z2.s, z4.s
505-
; SME-NEXT: ld1b { z2.s }, p0/z, [x1, x9]
506-
; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9]
507-
; SME-NEXT: mad z1.s, p0/m, z3.s, z5.s
508-
; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8]
509-
; SME-NEXT: mov w8, #24 // =0x18
510-
; SME-NEXT: mov w9, #8 // =0x8
511-
; SME-NEXT: ld1b { z5.s }, p0/z, [x1, x8]
512-
; SME-NEXT: mla z0.s, p0/m, z3.s, z6.s
513-
; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8]
514-
; SME-NEXT: mov w8, #28 // =0x1c
515-
; SME-NEXT: mla z1.s, p0/m, z4.s, z2.s
516-
; SME-NEXT: ld1b { z2.s }, p0/z, [x1, x9]
517-
; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9]
518-
; SME-NEXT: mov w9, #12 // =0xc
519-
; SME-NEXT: ld1b { z6.s }, p0/z, [x1, x8]
520-
; SME-NEXT: mla z1.s, p0/m, z4.s, z2.s
521-
; SME-NEXT: movprfx z2, z0
522-
; SME-NEXT: mla z2.s, p0/m, z3.s, z5.s
523-
; SME-NEXT: ld1b { z0.s }, p0/z, [x1, x9]
524-
; SME-NEXT: ld1sb { z3.s }, p0/z, [x2, x8]
525-
; SME-NEXT: ld1sb { z4.s }, p0/z, [x2, x9]
526-
; SME-NEXT: mad z0.s, p0/m, z4.s, z1.s
527-
; SME-NEXT: movprfx z1, z2
528-
; SME-NEXT: mla z1.s, p0/m, z3.s, z6.s
481+
; SME-NEXT: ldp q0, q1, [x0]
482+
; SME-NEXT: ldp q3, q2, [x1]
483+
; SME-NEXT: ldp q5, q4, [x2]
484+
; SME-NEXT: usdot z0.s, z3.b, z5.b
485+
; SME-NEXT: usdot z1.s, z2.b, z4.b
529486
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0
530487
; SME-NEXT: // kill: def $q1 killed $q1 killed $z1
531488
; SME-NEXT: ret
@@ -610,20 +567,10 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
610567
;
611568
; SME-LABEL: four_way_i8_i32_vl256_usdot:
612569
; SME: // %bb.0:
613-
; SME-NEXT: ptrue p0.s
614570
; SME-NEXT: ldr z0, [x0]
615-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1]
616-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2]
617-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
618-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #1, mul vl]
619-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #1, mul vl]
620-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
621-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #2, mul vl]
622-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #2, mul vl]
623-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
624-
; SME-NEXT: ld1b { z1.s }, p0/z, [x1, #3, mul vl]
625-
; SME-NEXT: ld1sb { z2.s }, p0/z, [x2, #3, mul vl]
626-
; SME-NEXT: mla z0.s, p0/m, z2.s, z1.s
571+
; SME-NEXT: ldr z1, [x1]
572+
; SME-NEXT: ldr z2, [x2]
573+
; SME-NEXT: usdot z0.s, z1.b, z2.b
627574
; SME-NEXT: mov z1.d, z0.d
628575
; SME-NEXT: ext z1.b, z1.b, z0.b, #16
629576
; SME-NEXT: // kill: def $q0 killed $q0 killed $z0

0 commit comments

Comments
 (0)