Skip to content

Commit f60ec38

Browse files
author
Kai Lin
committed
[RVV] Add test for missed VWMACC combine
Add a minimal reproducer for consecutive vwmacc-like operations to illustrate that the previous DAG combine logic may miss combining mul+add chains into a single vwmacc.vx instruction.
1 parent b196c52 commit f60ec38

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define void @matmul_min(<32 x i8>* %vptr, i8* %scalars, <32 x i16>* %acc0_ptr, <32 x i16>* %acc1_ptr) {
6+
; CHECK-LABEL: matmul_min:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: li a4, 64
9+
; CHECK-NEXT: li a5, 32
10+
; CHECK-NEXT: vsetvli zero, a4, e8, m4, ta, ma
11+
; CHECK-NEXT: vle8.v v8, (a2)
12+
; CHECK-NEXT: vsetvli zero, a5, e8, m2, ta, ma
13+
; CHECK-NEXT: vle8.v v20, (a0)
14+
; CHECK-NEXT: lb a0, 0(a1)
15+
; CHECK-NEXT: lb a1, 1(a1)
16+
; CHECK-NEXT: vsetvli zero, a4, e8, m4, ta, ma
17+
; CHECK-NEXT: vle8.v v12, (a3)
18+
; CHECK-NEXT: vsetvli zero, a5, e8, m2, ta, ma
19+
; CHECK-NEXT: vwmacc.vx v8, a0, v20
20+
; CHECK-NEXT: vwmul.vx v16, v20, a1
21+
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
22+
; CHECK-NEXT: vadd.vv v12, v16, v12
23+
; CHECK-NEXT: vsetvli zero, a4, e8, m4, ta, ma
24+
; CHECK-NEXT: vse8.v v8, (a2)
25+
; CHECK-NEXT: vse8.v v12, (a3)
26+
; CHECK-NEXT: ret
27+
entry:
28+
%acc0 = load <32 x i16>, <32 x i16>* %acc0_ptr, align 1
29+
%acc1 = load <32 x i16>, <32 x i16>* %acc1_ptr, align 1
30+
31+
%v8 = load <32 x i8>, <32 x i8>* %vptr, align 1
32+
%v16 = sext <32 x i8> %v8 to <32 x i16>
33+
34+
%s0_ptr = getelementptr i8, i8* %scalars, i32 0
35+
%s0_i8 = load i8, i8* %s0_ptr, align 1
36+
%s0_i16 = sext i8 %s0_i8 to i16
37+
%tmp0 = insertelement <32 x i16> undef, i16 %s0_i16, i32 0
38+
%splat0 = shufflevector <32 x i16> %tmp0, <32 x i16> undef, <32 x i32> zeroinitializer
39+
%mul0 = mul <32 x i16> %splat0, %v16
40+
%add0 = add <32 x i16> %mul0, %acc0
41+
42+
%s1_ptr = getelementptr i8, i8* %scalars, i32 1
43+
%s1_i8 = load i8, i8* %s1_ptr, align 1
44+
%s1_i16 = sext i8 %s1_i8 to i16
45+
%tmp1 = insertelement <32 x i16> undef, i16 %s1_i16, i32 0
46+
%splat1 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> zeroinitializer
47+
%mul1 = mul <32 x i16> %splat1, %v16
48+
%add1 = add <32 x i16> %mul1, %acc1
49+
50+
store <32 x i16> %add0, <32 x i16>* %acc0_ptr, align 1
51+
store <32 x i16> %add1, <32 x i16>* %acc1_ptr, align 1
52+
53+
ret void
54+
}
55+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
56+
; RV32: {{.*}}
57+
; RV64: {{.*}}

0 commit comments

Comments
 (0)