Skip to content

Commit bd861d0

Browse files
committed
[AArch64] Add some basic patterns for qshrn.
With the truncssat nodes these are relatively simple tablegen patterns to add. The existing intrinsics are converted to shift+truncsat to they can lower using the new patterns. Fixes #112925.
1 parent f2302ed commit bd861d0

File tree

3 files changed

+54
-63
lines changed

3 files changed

+54
-63
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5950,6 +5950,27 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
59505950
case Intrinsic::aarch64_neon_uqxtn:
59515951
return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
59525952
Op.getOperand(1));
5953+
case Intrinsic::aarch64_neon_sqshrn:
5954+
if (Op.getValueType().isVector())
5955+
return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
5956+
DAG.getNode(AArch64ISD::VASHR, dl,
5957+
Op.getOperand(1).getValueType(),
5958+
Op.getOperand(1), Op.getOperand(2)));
5959+
return SDValue();
5960+
case Intrinsic::aarch64_neon_sqshrun:
5961+
if (Op.getValueType().isVector())
5962+
return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
5963+
DAG.getNode(AArch64ISD::VASHR, dl,
5964+
Op.getOperand(1).getValueType(),
5965+
Op.getOperand(1), Op.getOperand(2)));
5966+
return SDValue();
5967+
case Intrinsic::aarch64_neon_uqshrn:
5968+
if (Op.getValueType().isVector())
5969+
return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
5970+
DAG.getNode(AArch64ISD::VLSHR, dl,
5971+
Op.getOperand(1).getValueType(),
5972+
Op.getOperand(1), Op.getOperand(2)));
5973+
return SDValue();
59535974
case Intrinsic::aarch64_sve_whilelo:
59545975
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
59555976
/*IsEqual=*/false);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8007,9 +8007,9 @@ defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
80078007
defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
80088008
defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
80098009
defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
8010-
int_aarch64_neon_sqshrn>;
8010+
BinOpFrag<(truncssat_s (AArch64vashr node:$LHS, node:$RHS))>>;
80118011
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
8012-
int_aarch64_neon_sqshrun>;
8012+
BinOpFrag<(truncssat_u (AArch64vashr node:$LHS, node:$RHS))>>;
80138013
defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
80148014
def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
80158015
(i32 vecshiftR64:$imm))),
@@ -8030,7 +8030,7 @@ defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
80308030
int_aarch64_neon_uqrshrn>;
80318031
defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
80328032
defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
8033-
int_aarch64_neon_uqshrn>;
8033+
BinOpFrag<(truncusat_u (AArch64vlshr node:$LHS, node:$RHS))>>;
80348034
defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
80358035
defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
80368036
TriOpFrag<(add node:$LHS,

llvm/test/CodeGen/AArch64/qshrn.ll

Lines changed: 30 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
define <4 x i16> @NarrowAShrI32By5(<4 x i32> %x) {
55
; CHECK-LABEL: NarrowAShrI32By5:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
8-
; CHECK-NEXT: sqxtn v0.4h, v0.4s
7+
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5
98
; CHECK-NEXT: ret
109
%s = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
1110
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s)
@@ -26,8 +25,7 @@ define <4 x i16> @NarrowAShrU32By5(<4 x i32> %x) {
2625
define <4 x i16> @NarrowAShrI32By5ToU16(<4 x i32> %x) {
2726
; CHECK-LABEL: NarrowAShrI32By5ToU16:
2827
; CHECK: // %bb.0:
29-
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
30-
; CHECK-NEXT: sqxtun v0.4h, v0.4s
28+
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5
3129
; CHECK-NEXT: ret
3230
%s = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
3331
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s)
@@ -48,8 +46,7 @@ define <4 x i16> @NarrowLShrI32By5(<4 x i32> %x) {
4846
define <4 x i16> @NarrowLShrU32By5(<4 x i32> %x) {
4947
; CHECK-LABEL: NarrowLShrU32By5:
5048
; CHECK: // %bb.0:
51-
; CHECK-NEXT: ushr v0.4s, v0.4s, #5
52-
; CHECK-NEXT: uqxtn v0.4h, v0.4s
49+
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5
5350
; CHECK-NEXT: ret
5451
%s = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
5552
%r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s)
@@ -71,8 +68,7 @@ define <4 x i16> @NarrowLShrI32By5ToU16(<4 x i32> %x) {
7168
define <2 x i32> @NarrowAShri64By5(<2 x i64> %x) {
7269
; CHECK-LABEL: NarrowAShri64By5:
7370
; CHECK: // %bb.0:
74-
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
75-
; CHECK-NEXT: sqxtn v0.2s, v0.2d
71+
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5
7672
; CHECK-NEXT: ret
7773
%s = ashr <2 x i64> %x, <i64 5, i64 5>
7874
%r = tail call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %s)
@@ -93,8 +89,7 @@ define <2 x i32> @NarrowAShrU64By5(<2 x i64> %x) {
9389
define <2 x i32> @NarrowAShri64By5ToU32(<2 x i64> %x) {
9490
; CHECK-LABEL: NarrowAShri64By5ToU32:
9591
; CHECK: // %bb.0:
96-
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
97-
; CHECK-NEXT: sqxtun v0.2s, v0.2d
92+
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5
9893
; CHECK-NEXT: ret
9994
%s = ashr <2 x i64> %x, <i64 5, i64 5>
10095
%r = tail call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %s)
@@ -115,8 +110,7 @@ define <2 x i32> @NarrowLShri64By5(<2 x i64> %x) {
115110
define <2 x i32> @NarrowLShrU64By5(<2 x i64> %x) {
116111
; CHECK-LABEL: NarrowLShrU64By5:
117112
; CHECK: // %bb.0:
118-
; CHECK-NEXT: ushr v0.2d, v0.2d, #5
119-
; CHECK-NEXT: uqxtn v0.2s, v0.2d
113+
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5
120114
; CHECK-NEXT: ret
121115
%s = lshr <2 x i64> %x, <i64 5, i64 5>
122116
%r = tail call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %s)
@@ -138,8 +132,7 @@ define <2 x i32> @NarrowLShri64By5ToU32(<2 x i64> %x) {
138132
define <8 x i8> @NarrowAShri16By5(<8 x i16> %x) {
139133
; CHECK-LABEL: NarrowAShri16By5:
140134
; CHECK: // %bb.0:
141-
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
142-
; CHECK-NEXT: sqxtn v0.8b, v0.8h
135+
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5
143136
; CHECK-NEXT: ret
144137
%s = ashr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
145138
%r = tail call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %s)
@@ -160,8 +153,7 @@ define <8 x i8> @NarrowAShrU16By5(<8 x i16> %x) {
160153
define <8 x i8> @NarrowAShri16By5ToU8(<8 x i16> %x) {
161154
; CHECK-LABEL: NarrowAShri16By5ToU8:
162155
; CHECK: // %bb.0:
163-
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
164-
; CHECK-NEXT: sqxtun v0.8b, v0.8h
156+
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5
165157
; CHECK-NEXT: ret
166158
%s = ashr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
167159
%r = tail call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %s)
@@ -182,8 +174,7 @@ define <8 x i8> @NarrowLShri16By5(<8 x i16> %x) {
182174
define <8 x i8> @NarrowLShrU16By5(<8 x i16> %x) {
183175
; CHECK-LABEL: NarrowLShrU16By5:
184176
; CHECK: // %bb.0:
185-
; CHECK-NEXT: ushr v0.8h, v0.8h, #5
186-
; CHECK-NEXT: uqxtn v0.8b, v0.8h
177+
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5
187178
; CHECK-NEXT: ret
188179
%s = lshr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
189180
%r = tail call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %s)
@@ -208,8 +199,7 @@ define <8 x i8> @NarrowLShri16By5ToU8(<8 x i16> %x) {
208199
define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) {
209200
; CHECK-LABEL: NarrowAShrI32By31:
210201
; CHECK: // %bb.0:
211-
; CHECK-NEXT: sshr v0.4s, v0.4s, #16
212-
; CHECK-NEXT: sqxtn v0.4h, v0.4s
202+
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #16
213203
; CHECK-NEXT: ret
214204
%s = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
215205
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s)
@@ -219,8 +209,7 @@ define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) {
219209
define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) {
220210
; CHECK-LABEL: NarrowAShrI32By31ToU16:
221211
; CHECK: // %bb.0:
222-
; CHECK-NEXT: sshr v0.4s, v0.4s, #16
223-
; CHECK-NEXT: sqxtun v0.4h, v0.4s
212+
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #16
224213
; CHECK-NEXT: ret
225214
%s = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
226215
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s)
@@ -230,8 +219,7 @@ define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) {
230219
define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) {
231220
; CHECK-LABEL: NarrowLShrU32By31:
232221
; CHECK: // %bb.0:
233-
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
234-
; CHECK-NEXT: uqxtn v0.4h, v0.4s
222+
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #16
235223
; CHECK-NEXT: ret
236224
%s = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
237225
%r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s)
@@ -242,10 +230,8 @@ define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) {
242230
define <16 x i8> @signed_minmax_v8i16_to_v16i8(<16 x i16> %x) {
243231
; CHECK-LABEL: signed_minmax_v8i16_to_v16i8:
244232
; CHECK: // %bb.0: // %entry
245-
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
246-
; CHECK-NEXT: sshr v1.8h, v1.8h, #5
247-
; CHECK-NEXT: sqxtn v0.8b, v0.8h
248-
; CHECK-NEXT: sqxtn2 v0.16b, v1.8h
233+
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5
234+
; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #5
249235
; CHECK-NEXT: ret
250236
entry:
251237
%s = ashr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -258,10 +244,8 @@ entry:
258244
define <16 x i8> @unsigned_minmax_v8i16_to_v16i8(<16 x i16> %x) {
259245
; CHECK-LABEL: unsigned_minmax_v8i16_to_v16i8:
260246
; CHECK: // %bb.0: // %entry
261-
; CHECK-NEXT: ushr v0.8h, v0.8h, #5
262-
; CHECK-NEXT: ushr v1.8h, v1.8h, #5
263-
; CHECK-NEXT: uqxtn v0.8b, v0.8h
264-
; CHECK-NEXT: uqxtn2 v0.16b, v1.8h
247+
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5
248+
; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #5
265249
; CHECK-NEXT: ret
266250
entry:
267251
%s = lshr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -273,10 +257,8 @@ entry:
273257
define <16 x i8> @unsigned_signed_minmax_v8i16_to_v16i8(<16 x i16> %x) {
274258
; CHECK-LABEL: unsigned_signed_minmax_v8i16_to_v16i8:
275259
; CHECK: // %bb.0: // %entry
276-
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
277-
; CHECK-NEXT: sshr v1.8h, v1.8h, #5
278-
; CHECK-NEXT: sqxtun v0.8b, v0.8h
279-
; CHECK-NEXT: sqxtun2 v0.16b, v1.8h
260+
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5
261+
; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #5
280262
; CHECK-NEXT: ret
281263
entry:
282264
%s = ashr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
@@ -290,10 +272,8 @@ entry:
290272
define <8 x i16> @signed_minmax_v4i32_to_v8i16(<8 x i32> %x) {
291273
; CHECK-LABEL: signed_minmax_v4i32_to_v8i16:
292274
; CHECK: // %bb.0: // %entry
293-
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
294-
; CHECK-NEXT: sshr v1.4s, v1.4s, #5
295-
; CHECK-NEXT: sqxtn v0.4h, v0.4s
296-
; CHECK-NEXT: sqxtn2 v0.8h, v1.4s
275+
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5
276+
; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #5
297277
; CHECK-NEXT: ret
298278
entry:
299279
%s = ashr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -306,10 +286,8 @@ entry:
306286
define <8 x i16> @unsigned_minmax_v4i32_to_v8i16(<8 x i32> %x) {
307287
; CHECK-LABEL: unsigned_minmax_v4i32_to_v8i16:
308288
; CHECK: // %bb.0: // %entry
309-
; CHECK-NEXT: ushr v0.4s, v0.4s, #5
310-
; CHECK-NEXT: ushr v1.4s, v1.4s, #5
311-
; CHECK-NEXT: uqxtn v0.4h, v0.4s
312-
; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
289+
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5
290+
; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #5
313291
; CHECK-NEXT: ret
314292
entry:
315293
%s = lshr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -321,10 +299,8 @@ entry:
321299
define <8 x i16> @unsigned_signed_minmax_v4i32_to_v8i16(<8 x i32> %x) {
322300
; CHECK-LABEL: unsigned_signed_minmax_v4i32_to_v8i16:
323301
; CHECK: // %bb.0: // %entry
324-
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
325-
; CHECK-NEXT: sshr v1.4s, v1.4s, #5
326-
; CHECK-NEXT: sqxtun v0.4h, v0.4s
327-
; CHECK-NEXT: sqxtun2 v0.8h, v1.4s
302+
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5
303+
; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #5
328304
; CHECK-NEXT: ret
329305
entry:
330306
%s = ashr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -338,10 +314,8 @@ entry:
338314
define <4 x i32> @signed_minmax_v4i64_to_v8i32(<4 x i64> %x) {
339315
; CHECK-LABEL: signed_minmax_v4i64_to_v8i32:
340316
; CHECK: // %bb.0: // %entry
341-
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
342-
; CHECK-NEXT: sshr v1.2d, v1.2d, #5
343-
; CHECK-NEXT: sqxtn v0.2s, v0.2d
344-
; CHECK-NEXT: sqxtn2 v0.4s, v1.2d
317+
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5
318+
; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #5
345319
; CHECK-NEXT: ret
346320
entry:
347321
%s = ashr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>
@@ -354,10 +328,8 @@ entry:
354328
define <4 x i32> @unsigned_minmax_v4i64_to_v8i32(<4 x i64> %x) {
355329
; CHECK-LABEL: unsigned_minmax_v4i64_to_v8i32:
356330
; CHECK: // %bb.0: // %entry
357-
; CHECK-NEXT: ushr v0.2d, v0.2d, #5
358-
; CHECK-NEXT: ushr v1.2d, v1.2d, #5
359-
; CHECK-NEXT: uqxtn v0.2s, v0.2d
360-
; CHECK-NEXT: uqxtn2 v0.4s, v1.2d
331+
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5
332+
; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #5
361333
; CHECK-NEXT: ret
362334
entry:
363335
%s = lshr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>
@@ -369,10 +341,8 @@ entry:
369341
define <4 x i32> @unsigned_signed_minmax_v4i64_to_v8i32(<4 x i64> %x) {
370342
; CHECK-LABEL: unsigned_signed_minmax_v4i64_to_v8i32:
371343
; CHECK: // %bb.0: // %entry
372-
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
373-
; CHECK-NEXT: sshr v1.2d, v1.2d, #5
374-
; CHECK-NEXT: sqxtun v0.2s, v0.2d
375-
; CHECK-NEXT: sqxtun2 v0.4s, v1.2d
344+
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5
345+
; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #5
376346
; CHECK-NEXT: ret
377347
entry:
378348
%s = ashr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>

0 commit comments

Comments
 (0)