Skip to content

Commit af99774

Browse files
committed
Add support for relaxed dot
1 parent ffaebd0 commit af99774

File tree

2 files changed

+36
-28
lines changed

2 files changed

+36
-28
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,6 +1711,26 @@ defm RELAXED_DOT :
17111711
"i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
17121712
"i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
17131713

1714+
def : Pat<
1715+
(v8i16 (add
1716+
(wasm_shuffle
1717+
(v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
1718+
(v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
1719+
(i32 0), (i32 1), (i32 4), (i32 5),
1720+
(i32 8), (i32 9), (i32 12), (i32 13),
1721+
(i32 16), (i32 17), (i32 20), (i32 21),
1722+
(i32 24), (i32 25), (i32 28), (i32 29)),
1723+
(wasm_shuffle
1724+
(v8i16 (extmul_low_s v16i8:$lhs, v16i8:$rhs)),
1725+
(v8i16 (extmul_high_s v16i8:$lhs, v16i8:$rhs)),
1726+
(i32 2), (i32 3), (i32 6), (i32 7),
1727+
(i32 10), (i32 11), (i32 14), (i32 15),
1728+
(i32 18), (i32 19), (i32 22), (i32 23),
1729+
(i32 26), (i32 27), (i32 30), (i32 31)))
1730+
),
1731+
(v8i16 (RELAXED_DOT v16i8:$lhs, v16i8:$rhs))
1732+
>;
1733+
17141734
defm RELAXED_DOT_ADD :
17151735
RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
17161736
(outs), (ins),
@@ -1719,6 +1739,13 @@ defm RELAXED_DOT_ADD :
17191739
"i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
17201740
"i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
17211741

1742+
def : Pat<
1743+
(v4i32 (add
1744+
(v4i32 (int_wasm_extadd_pairwise_signed
1745+
(v8i16 (int_wasm_relaxed_dot_i8x16_i7x16_signed v16i8:$lhs, v16i8:$rhs)))),
1746+
(v4i32 V128:$acc))),
1747+
(v4i32 (RELAXED_DOT_ADD v16i8:$lhs, v16i8:$rhs, (v4i32 V128:$acc)))
1748+
>;
17221749
//===----------------------------------------------------------------------===//
17231750
// Relaxed BFloat16 dot product
17241751
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/WebAssembly/simd-relaxed-dot.ll

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,8 @@ define <8 x i16> @relaxed_dot_sext_1(<16 x i8> %a, <16 x i8> %b) {
99
; CHECK-LABEL: relaxed_dot_sext_1:
1010
; CHECK: .functype relaxed_dot_sext_1 (v128, v128) -> (v128)
1111
; CHECK-NEXT: # %bb.0:
12-
; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push6=, $0, $1
13-
; CHECK-NEXT: local.tee $push5=, $2=, $pop6
14-
; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push4=, $0, $1
15-
; CHECK-NEXT: local.tee $push3=, $1=, $pop4
16-
; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
17-
; CHECK-NEXT: i8x16.shuffle $push0=, $2, $1, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
18-
; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
19-
; CHECK-NEXT: return $pop2
12+
; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
13+
; CHECK-NEXT: return $pop0
2014
%sext1 = sext <16 x i8> %a to <16 x i16>
2115
%sext2 = sext <16 x i8> %b to <16 x i16>
2216
%mul = mul <16 x i16> %sext1, %sext2
@@ -31,14 +25,8 @@ define <8 x i16> @relaxed_dot_sext_2(<16 x i8> %a, <16 x i8> %b) {
3125
; CHECK-LABEL: relaxed_dot_sext_2:
3226
; CHECK: .functype relaxed_dot_sext_2 (v128, v128) -> (v128)
3327
; CHECK-NEXT: # %bb.0:
34-
; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push6=, $0, $1
35-
; CHECK-NEXT: local.tee $push5=, $2=, $pop6
36-
; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push4=, $0, $1
37-
; CHECK-NEXT: local.tee $push3=, $1=, $pop4
38-
; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
39-
; CHECK-NEXT: i8x16.shuffle $push0=, $2, $1, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
40-
; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
41-
; CHECK-NEXT: return $pop2
28+
; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
29+
; CHECK-NEXT: return $pop0
4230
%sext1 = sext <16 x i8> %a to <16 x i16>
4331
%sext2 = sext <16 x i8> %b to <16 x i16>
4432
%mul = mul <16 x i16> %sext1, %sext2
@@ -52,14 +40,8 @@ define <8 x i16> @relaxed_dot_sext_self(<16 x i8> %v) {
5240
; CHECK-LABEL: relaxed_dot_sext_self:
5341
; CHECK: .functype relaxed_dot_sext_self (v128) -> (v128)
5442
; CHECK-NEXT: # %bb.0:
55-
; CHECK-NEXT: i16x8.extmul_low_i8x16_s $push6=, $0, $0
56-
; CHECK-NEXT: local.tee $push5=, $1=, $pop6
57-
; CHECK-NEXT: i16x8.extmul_high_i8x16_s $push4=, $0, $0
58-
; CHECK-NEXT: local.tee $push3=, $0=, $pop4
59-
; CHECK-NEXT: i8x16.shuffle $push1=, $pop5, $pop3, 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
60-
; CHECK-NEXT: i8x16.shuffle $push0=, $1, $0, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31
61-
; CHECK-NEXT: i16x8.add $push2=, $pop1, $pop0
62-
; CHECK-NEXT: return $pop2
43+
; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $0
44+
; CHECK-NEXT: return $pop0
6345
%sext = sext <16 x i8> %v to <16 x i16>
6446
%mul = mul <16 x i16> %sext, %sext
6547
%shuffle1 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -72,10 +54,8 @@ define <4 x i32> @relaxed_dot_add_from_relaxed_dot(<16 x i8> %a, <16 x i8> %b, <
7254
; CHECK-LABEL: relaxed_dot_add_from_relaxed_dot:
7355
; CHECK: .functype relaxed_dot_add_from_relaxed_dot (v128, v128, v128) -> (v128)
7456
; CHECK-NEXT: # %bb.0:
75-
; CHECK-NEXT: i16x8.relaxed_dot_i8x16_i7x16_s $push0=, $0, $1
76-
; CHECK-NEXT: i32x4.extadd_pairwise_i16x8_s $push1=, $pop0
77-
; CHECK-NEXT: i32x4.add $push2=, $pop1, $2
78-
; CHECK-NEXT: return $pop2
57+
; CHECK-NEXT: i32x4.relaxed_dot_i8x16_i7x16_add_s $push0=, $0, $1, $2
58+
; CHECK-NEXT: return $pop0
7959
%relaxed_dot_call = call <8 x i16> @llvm.wasm.relaxed.dot.i8x16.i7x16.signed(<16 x i8> %a, <16 x i8> %b)
8060
%sext = call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> %relaxed_dot_call)
8161
%res = add <4 x i32> %sext, %c
@@ -102,6 +82,7 @@ define <8 x i16> @relaxed_dot_zext(<16 x i8> %a, <16 x i8> %b) {
10282
%shuffle2 = shufflevector <16 x i16> %mul, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
10383
%res = add <8 x i16> %shuffle1, %shuffle2
10484
ret <8 x i16> %res
85+
10586
}
10687

10788
; INFO: Negative test

0 commit comments

Comments
 (0)