Skip to content

Commit eb7f06d

Browse files
committed
webassembly: recognize saturating truncation
and turn it into `narrow_TYPE_s`
1 parent 3fa3e09 commit eb7f06d

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,49 @@ def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
14451445
def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
14461446
(NARROW_U_I16x8 $left, $right)>;
14471447

1448+
// Recognize a saturating truncation and convert into the corresponding
1449+
// narrow_TYPE_s or narrow_TYPE_u instruction.
1450+
multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
1451+
Instruction narrow, int minval,
1452+
int maxval, int mask> {
1453+
def : Pat<
1454+
(output (wasm_narrow_u
1455+
(and (smin (smax (input V128:$a), (splat_vector (i32 minval))),
1456+
(splat_vector (i32 maxval))), (splat_vector (i32 mask))),
1457+
(and (smin (smax (input V128:$b), (splat_vector (i32 minval))),
1458+
(splat_vector (i32 maxval))), (splat_vector (i32 mask)))
1459+
)),
1460+
(narrow V128:$a, V128:$b)
1461+
>;
1462+
1463+
def : Pat<
1464+
(output (wasm_narrow_u
1465+
(and (smax (smin (input V128:$a), (splat_vector (i32 maxval))),
1466+
(splat_vector (i32 minval))), (splat_vector (i32 mask))),
1467+
(and (smax (smin (input V128:$b), (splat_vector (i32 maxval))),
1468+
(splat_vector (i32 minval))), (splat_vector (i32 mask)))
1469+
)),
1470+
(narrow V128:$a, V128:$b)
1471+
>;
1472+
}
1473+
1474+
defm : SignedSaturatingTruncate<v8i16, v16i8, NARROW_S_I8x16, -128, 127, 0xFF>;
1475+
defm : SignedSaturatingTruncate<v4i32, v8i16, NARROW_S_I16x8, -32768, 32767, 0xFFFF>;
1476+
1477+
multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
1478+
Instruction narrow, int maxval> {
1479+
def : Pat<
1480+
(output (wasm_narrow_u
1481+
(umin (input V128:$a), (splat_vector (i32 maxval))),
1482+
(umin (input V128:$b), (splat_vector (i32 maxval)))
1483+
)),
1484+
(narrow V128:$a, V128:$b)
1485+
>;
1486+
}
1487+
1488+
defm : UnsignedSaturatingTruncate<v8i16, v16i8, NARROW_U_I8x16, 0xFF>;
1489+
defm : UnsignedSaturatingTruncate<v4i32, v8i16, NARROW_U_I16x8, 0xFFFF>;
1490+
14481491
// Bitcasts are nops
14491492
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
14501493
foreach t1 = AllVecs in
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
4+
5+
target triple = "wasm32-unknown-unknown"
6+
7+
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
8+
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
9+
10+
define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
11+
; CHECK-LABEL: i16_signed:
12+
; CHECK: .functype i16_signed (v128, v128) -> (v128)
13+
; CHECK-NEXT: # %bb.0: # %bb2
14+
; CHECK-NEXT: local.get 0
15+
; CHECK-NEXT: local.get 1
16+
; CHECK-NEXT: i8x16.narrow_i16x8_s
17+
; CHECK-NEXT: # fallthrough-return
18+
bb2:
19+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
20+
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
21+
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
22+
%3 = trunc nsw <16 x i16> %2 to <16 x i8>
23+
ret <16 x i8> %3
24+
ret <16 x i8> %3
25+
}
26+
27+
define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
28+
; CHECK-LABEL: i32_signed:
29+
; CHECK: .functype i32_signed (v128, v128) -> (v128)
30+
; CHECK-NEXT: # %bb.0: # %bb2
31+
; CHECK-NEXT: local.get 0
32+
; CHECK-NEXT: local.get 1
33+
; CHECK-NEXT: i16x8.narrow_i32x4_s
34+
; CHECK-NEXT: # fallthrough-return
35+
bb2:
36+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
37+
%1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
38+
%2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
39+
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
40+
ret <8 x i16> %3
41+
}
42+
43+
define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
44+
; CHECK-LABEL: i32_signed_flipped:
45+
; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128)
46+
; CHECK-NEXT: # %bb.0: # %bb2
47+
; CHECK-NEXT: local.get 0
48+
; CHECK-NEXT: local.get 1
49+
; CHECK-NEXT: i16x8.narrow_i32x4_s
50+
; CHECK-NEXT: # fallthrough-return
51+
bb2:
52+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
53+
%1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
54+
%2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
55+
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
56+
ret <8 x i16> %3
57+
}
58+
59+
define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
60+
; CHECK-LABEL: i16_unsigned:
61+
; CHECK: .functype i16_unsigned (v128, v128) -> (v128)
62+
; CHECK-NEXT: # %bb.0: # %bb2
63+
; CHECK-NEXT: local.get 0
64+
; CHECK-NEXT: local.get 1
65+
; CHECK-NEXT: i8x16.narrow_i16x8_u
66+
; CHECK-NEXT: # fallthrough-return
67+
bb2:
68+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
69+
%1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
70+
%2 = trunc nuw <16 x i16> %1 to <16 x i8>
71+
ret <16 x i8> %2
72+
}
73+
74+
define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
75+
; CHECK-LABEL: i32_unsigned:
76+
; CHECK: .functype i32_unsigned (v128, v128) -> (v128)
77+
; CHECK-NEXT: # %bb.0: # %bb2
78+
; CHECK-NEXT: local.get 0
79+
; CHECK-NEXT: local.get 1
80+
; CHECK-NEXT: i16x8.narrow_i32x4_u
81+
; CHECK-NEXT: # fallthrough-return
82+
bb2:
83+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
84+
%1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
85+
%2 = trunc nsw <8 x i32> %1 to <8 x i16>
86+
ret <8 x i16> %2
87+
}

0 commit comments

Comments
 (0)