Skip to content

Commit 656bd57

Browse files
committed
s390x: map saturating truncation to a packs/packu
1 parent 76bf0f3 commit 656bd57

File tree

3 files changed

+160
-0
lines changed

3 files changed

+160
-0
lines changed

llvm/lib/Target/SystemZ/SystemZInstrVector.td

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,6 +1299,46 @@ let Predicates = [FeatureVectorEnhancements3] in {
12991299
(VMNLQ VR128:$x, VR128:$y)>;
13001300
}
13011301

1302+
// Instantiate packs/packu: recognize a saturating truncation and convert
1303+
// into the corresponding packs/packu instruction.
1304+
multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
1305+
Instruction packs> {
1306+
def : Pat<
1307+
(output (z_pack
1308+
(smin (smax (input VR128:$a), ssat_trunc_min_vec), ssat_trunc_max_vec),
1309+
(smin (smax (input VR128:$b), ssat_trunc_min_vec), ssat_trunc_max_vec)
1310+
)),
1311+
(packs VR128:$a, VR128:$b)
1312+
>;
1313+
1314+
def : Pat<
1315+
(output (z_pack
1316+
(smax (smin (input VR128:$a), ssat_trunc_max_vec), ssat_trunc_min_vec),
1317+
(smax (smin (input VR128:$b), ssat_trunc_max_vec), ssat_trunc_min_vec)
1318+
)),
1319+
(packs VR128:$a, VR128:$b)
1320+
>;
1321+
}
1322+
1323+
defm : SignedSaturatingTruncate<v8i16, v16i8, VPKSH>;
1324+
defm : SignedSaturatingTruncate<v4i32, v8i16, VPKSF>;
1325+
defm : SignedSaturatingTruncate<v2i64, v4i32, VPKSG>;
1326+
1327+
multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
1328+
Instruction packu> {
1329+
def : Pat<
1330+
(output (z_pack
1331+
(umin (input VR128:$a), usat_trunc_max_vec),
1332+
(umin (input VR128:$b), usat_trunc_max_vec)
1333+
)),
1334+
(packu VR128:$a, VR128:$b)
1335+
>;
1336+
}
1337+
1338+
defm : UnsignedSaturatingTruncate<v8i16, v16i8, VPKLSH>;
1339+
defm : UnsignedSaturatingTruncate<v4i32, v8i16, VPKLSF>;
1340+
defm : UnsignedSaturatingTruncate<v2i64, v4i32, VPKLSG>;
1341+
13021342
// Instantiate comparison patterns to recognize VACC/VSCBI for TYPE.
13031343
multiclass IntegerComputeCarryOrBorrow<ValueType type,
13041344
Instruction vacc, Instruction vscbi> {

llvm/lib/Target/SystemZ/SystemZOperators.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,31 @@ def vsplat_imm_eq_1 : PatFrag<(ops), (build_vector), [{
10671067
}]>;
10681068
def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>;
10691069

1070+
// Vector constants for saturating truncation, containing the minimum and
1071+
// maximum value for the integer type that is half of the element width.
1072+
def ssat_trunc_min_vec: PatFrag<(ops), (build_vector), [{
1073+
APInt Imm;
1074+
EVT EltTy = N->getValueType(0).getVectorElementType();
1075+
unsigned SizeInBits = EltTy.getSizeInBits();
1076+
APInt min = APInt::getSignedMinValue(SizeInBits / 2).sext(SizeInBits);
1077+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, min);
1078+
}]>;
1079+
def ssat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
1080+
APInt Imm;
1081+
EVT EltTy = N->getValueType(0).getVectorElementType();
1082+
unsigned SizeInBits = EltTy.getSizeInBits();
1083+
APInt max = APInt::getSignedMaxValue(SizeInBits / 2).sext(SizeInBits);
1084+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
1085+
}]>;
1086+
1087+
def usat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
1088+
APInt Imm;
1089+
EVT EltTy = N->getValueType(0).getVectorElementType();
1090+
unsigned SizeInBits = EltTy.getSizeInBits();
1091+
APInt max = APInt::getMaxValue(SizeInBits / 2).zext(SizeInBits);
1092+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
1093+
}]>;
1094+
10701095
// Signed "integer greater than zero" on vectors.
10711096
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
10721097

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
4+
5+
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
6+
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
7+
8+
define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
9+
; CHECK-LABEL: i16_signed:
10+
; CHECK: # %bb.0: # %bb2
11+
; CHECK-NEXT: vpksh %v24, %v24, %v26
12+
; CHECK-NEXT: br %r14
13+
bb2:
14+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15+
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
16+
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
17+
%3 = trunc nsw <16 x i16> %2 to <16 x i8>
18+
ret <16 x i8> %3
19+
ret <16 x i8> %3
20+
}
21+
22+
define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
23+
; CHECK-LABEL: i32_signed:
24+
; CHECK: # %bb.0: # %bb2
25+
; CHECK-NEXT: vpksf %v24, %v24, %v26
26+
; CHECK-NEXT: br %r14
27+
bb2:
28+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
29+
%1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
30+
%2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
31+
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
32+
ret <8 x i16> %3
33+
}
34+
35+
define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) {
36+
; CHECK-LABEL: i64_signed:
37+
; CHECK: # %bb.0: # %bb2
38+
; CHECK-NEXT: vpksg %v24, %v24, %v26
39+
; CHECK-NEXT: br %r14
40+
bb2:
41+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42+
%1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648))
43+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647))
44+
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
45+
ret <4 x i32> %3
46+
}
47+
48+
define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) {
49+
; CHECK-LABEL: i64_signed_flipped:
50+
; CHECK: # %bb.0: # %bb2
51+
; CHECK-NEXT: vpksg %v24, %v24, %v26
52+
; CHECK-NEXT: br %r14
53+
bb2:
54+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
55+
%1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0)
56+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1)
57+
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
58+
ret <4 x i32> %3
59+
}
60+
61+
define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
62+
; CHECK-LABEL: i16_unsigned:
63+
; CHECK: # %bb.0: # %bb2
64+
; CHECK-NEXT: vpklsh %v24, %v24, %v26
65+
; CHECK-NEXT: br %r14
66+
bb2:
67+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
68+
%1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
69+
%2 = trunc nuw <16 x i16> %1 to <16 x i8>
70+
ret <16 x i8> %2
71+
}
72+
73+
define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
74+
; CHECK-LABEL: i32_unsigned:
75+
; CHECK: # %bb.0: # %bb2
76+
; CHECK-NEXT: vpklsf %v24, %v24, %v26
77+
; CHECK-NEXT: br %r14
78+
bb2:
79+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80+
%1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
81+
%2 = trunc nsw <8 x i32> %1 to <8 x i16>
82+
ret <8 x i16> %2
83+
}
84+
85+
define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) {
86+
; CHECK-LABEL: i64_unsigned:
87+
; CHECK: # %bb.0: # %bb2
88+
; CHECK-NEXT: vpklsg %v24, %v24, %v26
89+
; CHECK-NEXT: br %r14
90+
bb2:
91+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92+
%1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295))
93+
%2 = trunc nuw <4 x i64> %1 to <4 x i32>
94+
ret <4 x i32> %2
95+
}

0 commit comments

Comments
 (0)