Skip to content

Commit 55d4e92

Browse files
authored
[WebAssembly] Add extra pattern for dot (llvm#151775)
Fixes llvm#50154
1 parent 15cde99 commit 55d4e92

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U :
12131213
defm EXTMUL_HIGH_U :
12141214
SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
12151215

1216+
// Pattern for i32x4.dot_i16x8_s
1217+
def : Pat<
1218+
(v4i32 (add
1219+
(wasm_shuffle
1220+
(v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
1221+
(v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
1222+
(i32 0), (i32 1), (i32 2), (i32 3),
1223+
(i32 8), (i32 9), (i32 10), (i32 11),
1224+
(i32 16), (i32 17), (i32 18), (i32 19),
1225+
(i32 24), (i32 25), (i32 26), (i32 27)),
1226+
(wasm_shuffle
1227+
(v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
1228+
(v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
1229+
(i32 4), (i32 5), (i32 6), (i32 7),
1230+
(i32 12), (i32 13), (i32 14), (i32 15),
1231+
(i32 20), (i32 21), (i32 22), (i32 23),
1232+
(i32 28), (i32 29), (i32 30), (i32 31)))
1233+
),
1234+
(v4i32 (DOT v8i16:$lhs, v8i16:$rhs))
1235+
>;
1236+
12161237
//===----------------------------------------------------------------------===//
12171238
// Floating-point unary arithmetic
12181239
//===----------------------------------------------------------------------===//
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
3+
4+
target triple = "wasm32-unknown-unknown"
5+
6+
define <4 x i32> @dot_sext_1(<8 x i16> %a, <8 x i16> %b) {
7+
; CHECK-LABEL: dot_sext_1:
8+
; CHECK: .functype dot_sext_1 (v128, v128) -> (v128)
9+
; CHECK-NEXT: # %bb.0:
10+
; CHECK-NEXT: local.get 0
11+
; CHECK-NEXT: local.get 1
12+
; CHECK-NEXT: i32x4.dot_i16x8_s
13+
; CHECK-NEXT: # fallthrough-return
14+
%sext1 = sext <8 x i16> %a to <8 x i32>
15+
%sext2 = sext <8 x i16> %b to <8 x i32>
16+
%mul = mul <8 x i32> %sext1, %sext2
17+
%shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
18+
%shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
19+
%res = add <4 x i32> %shuffle1, %shuffle2
20+
ret <4 x i32> %res
21+
}
22+
23+
24+
define <4 x i32> @dot_sext_2(<8 x i16> %a, <8 x i16> %b) {
25+
; CHECK-LABEL: dot_sext_2:
26+
; CHECK: .functype dot_sext_2 (v128, v128) -> (v128)
27+
; CHECK-NEXT: # %bb.0:
28+
; CHECK-NEXT: local.get 0
29+
; CHECK-NEXT: local.get 1
30+
; CHECK-NEXT: i32x4.dot_i16x8_s
31+
; CHECK-NEXT: # fallthrough-return
32+
%sext1 = sext <8 x i16> %a to <8 x i32>
33+
%sext2 = sext <8 x i16> %b to <8 x i32>
34+
%mul = mul <8 x i32> %sext1, %sext2
35+
%shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
36+
%shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
37+
%res = add <4 x i32> %shuffle2, %shuffle1
38+
ret <4 x i32> %res
39+
}
40+
41+
define <4 x i32> @dot_sext_self(<8 x i16> %v) {
42+
; CHECK-LABEL: dot_sext_self:
43+
; CHECK: .functype dot_sext_self (v128) -> (v128)
44+
; CHECK-NEXT: # %bb.0:
45+
; CHECK-NEXT: local.get 0
46+
; CHECK-NEXT: local.get 0
47+
; CHECK-NEXT: i32x4.dot_i16x8_s
48+
; CHECK-NEXT: # fallthrough-return
49+
%sext = sext <8 x i16> %v to <8 x i32>
50+
%mul = mul <8 x i32> %sext, %sext
51+
%shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
52+
%shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
53+
%res = add <4 x i32> %shuffle1, %shuffle2
54+
ret <4 x i32> %res
55+
}
56+
57+
; INFO: Negative test
58+
define <4 x i32> @dot_zext(<8 x i16> %a, <8 x i16> %b) {
59+
; CHECK-LABEL: dot_zext:
60+
; CHECK: .functype dot_zext (v128, v128) -> (v128)
61+
; CHECK-NEXT: .local v128
62+
; CHECK-NEXT: # %bb.0:
63+
; CHECK-NEXT: local.get 0
64+
; CHECK-NEXT: local.get 1
65+
; CHECK-NEXT: i32x4.extmul_low_i16x8_u
66+
; CHECK-NEXT: local.tee 2
67+
; CHECK-NEXT: local.get 0
68+
; CHECK-NEXT: local.get 1
69+
; CHECK-NEXT: i32x4.extmul_high_i16x8_u
70+
; CHECK-NEXT: local.tee 1
71+
; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
72+
; CHECK-NEXT: local.get 2
73+
; CHECK-NEXT: local.get 1
74+
; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
75+
; CHECK-NEXT: i32x4.add
76+
; CHECK-NEXT: # fallthrough-return
77+
%zext1 = zext <8 x i16> %a to <8 x i32>
78+
%zext2 = zext <8 x i16> %b to <8 x i32>
79+
%mul = mul <8 x i32> %zext1, %zext2
80+
%shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
81+
%shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
82+
%res = add <4 x i32> %shuffle1, %shuffle2
83+
ret <4 x i32> %res
84+
}
85+
86+
; INFO: Negative test
87+
define <4 x i32> @dot_wrong_shuffle(<8 x i16> %a, <8 x i16> %b) {
88+
; CHECK-LABEL: dot_wrong_shuffle:
89+
; CHECK: .functype dot_wrong_shuffle (v128, v128) -> (v128)
90+
; CHECK-NEXT: # %bb.0:
91+
; CHECK-NEXT: local.get 0
92+
; CHECK-NEXT: local.get 1
93+
; CHECK-NEXT: i32x4.extmul_low_i16x8_s
94+
; CHECK-NEXT: local.get 0
95+
; CHECK-NEXT: local.get 1
96+
; CHECK-NEXT: i32x4.extmul_high_i16x8_s
97+
; CHECK-NEXT: i32x4.add
98+
; CHECK-NEXT: # fallthrough-return
99+
%sext1 = sext <8 x i16> %a to <8 x i32>
100+
%sext2 = sext <8 x i16> %b to <8 x i32>
101+
%mul = mul <8 x i32> %sext1, %sext2
102+
%shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
103+
%shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
104+
%res = add <4 x i32> %shuffle1, %shuffle2
105+
ret <4 x i32> %res
106+
}

0 commit comments

Comments
 (0)