Skip to content

Commit 4353845

Browse files
authored
[AArch64][GlobalISel] Added support for hadd family of intrinsics (#163985)
GlobalISel now selects hadd family of intrinsics, without falling back to SDAG.
1 parent 93d759c commit 4353845

File tree

18 files changed

+8212
-7234
lines changed

18 files changed

+8212
-7234
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,19 @@ Compute the absolute difference (signed and unsigned), e.g. trunc(abs(ext(x)-ext
511511
%0:_(s33) = G_ABDS %2, %3
512512
%1:_(s33) = G_ABDU %4, %5
513513
514+
G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL
515+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
516+
517+
Computes the average of corresponding elements in two vectors (signed and unsigned).
518+
Resulting vector contains values that are either rounded or truncated. e.g. trunc(shr(add(ext(a),ext(b)),1)).
519+
520+
.. code-block:: none
521+
522+
%0:_(<4 x i16>) = G_UAVGFLOOR %4:_(<4 x i16>), %5:_(<4 x i16>)
523+
%1:_(<4 x i16>) = G_UAVGCEIL %6:_(<4 x i16>), %7:_(<4 x i16>)
524+
%2:_(<4 x i16>) = G_SAVGFLOOR %8:_(<4 x i16>), %9:_(<4 x i16>)
525+
%3:_(<4 x i16>) = G_SAVGCEIL %10:_(<4 x i16>), %11:_(<4 x i16>)
526+
514527
Floating Point Operations
515528
-------------------------
516529

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,17 @@ HANDLE_TARGET_OPCODE(G_ABDS)
298298
/// Generic absolute difference unsigned instruction.
299299
HANDLE_TARGET_OPCODE(G_ABDU)
300300

301+
/// Generic vector average with truncate unsigned instruction.
302+
HANDLE_TARGET_OPCODE(G_UAVGFLOOR)
303+
304+
/// Generic vector average with round unsigned instruction.
305+
HANDLE_TARGET_OPCODE(G_UAVGCEIL)
306+
307+
/// Generic vector average with truncate signed instruction.
308+
HANDLE_TARGET_OPCODE(G_SAVGFLOOR)
309+
310+
/// Generic vector average with round signed instruction.
311+
HANDLE_TARGET_OPCODE(G_SAVGCEIL)
301312

302313
HANDLE_TARGET_OPCODE(G_IMPLICIT_DEF)
303314

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,34 @@ def G_ABDU : GenericInstruction {
423423
let isCommutable = true;
424424
}
425425

426+
// Generic vector average truncated unsigned.
427+
def G_UAVGFLOOR : GenericInstruction {
428+
let OutOperandList = (outs type0:$dst);
429+
let InOperandList = (ins type0:$src1, type0:$src2);
430+
let hasSideEffects = 0;
431+
}
432+
433+
// Generic vector average rounded unsigned.
434+
def G_UAVGCEIL : GenericInstruction {
435+
let OutOperandList = (outs type0:$dst);
436+
let InOperandList = (ins type0:$src1, type0:$src2);
437+
let hasSideEffects = 0;
438+
}
439+
440+
// Generic vector average truncated signed.
441+
def G_SAVGFLOOR : GenericInstruction {
442+
let OutOperandList = (outs type0:$dst);
443+
let InOperandList = (ins type0:$src1, type0:$src2);
444+
let hasSideEffects = 0;
445+
}
446+
447+
// Generic vector average rounded signed.
448+
def G_SAVGCEIL : GenericInstruction {
449+
let OutOperandList = (outs type0:$dst);
450+
let InOperandList = (ins type0:$src1, type0:$src2);
451+
let hasSideEffects = 0;
452+
}
453+
426454
/// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount.
427455
/// fshl(X,Y,Z): (X << (Z % bitwidth)) | (Y >> (bitwidth - (Z % bitwidth)))
428456
def G_FSHL : GenericInstruction {

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ def : GINodeEquiv<G_LSHR, srl>;
8383
def : GINodeEquiv<G_ASHR, sra>;
8484
def : GINodeEquiv<G_ABDS, abds>;
8585
def : GINodeEquiv<G_ABDU, abdu>;
86+
def : GINodeEquiv<G_UAVGFLOOR, avgflooru>;
87+
def : GINodeEquiv<G_UAVGCEIL, avgceilu>;
88+
def : GINodeEquiv<G_SAVGFLOOR, avgfloors>;
89+
def : GINodeEquiv<G_SAVGCEIL, avgceils>;
8690
def : GINodeEquiv<G_SADDSAT, saddsat>;
8791
def : GINodeEquiv<G_UADDSAT, uaddsat>;
8892
def : GINodeEquiv<G_SSUBSAT, ssubsat>;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
289289
.moreElementsToNextPow2(0)
290290
.lower();
291291

292-
getActionDefinitionsBuilder({G_ABDS, G_ABDU})
292+
getActionDefinitionsBuilder(
293+
{G_ABDS, G_ABDU, G_UAVGFLOOR, G_UAVGCEIL, G_SAVGFLOOR, G_SAVGCEIL})
293294
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
294295
.lower();
295296

@@ -1834,6 +1835,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
18341835
return LowerBinOp(TargetOpcode::G_ABDS);
18351836
case Intrinsic::aarch64_neon_uabd:
18361837
return LowerBinOp(TargetOpcode::G_ABDU);
1838+
case Intrinsic::aarch64_neon_uhadd:
1839+
return LowerBinOp(TargetOpcode::G_UAVGFLOOR);
1840+
case Intrinsic::aarch64_neon_urhadd:
1841+
return LowerBinOp(TargetOpcode::G_UAVGCEIL);
1842+
case Intrinsic::aarch64_neon_shadd:
1843+
return LowerBinOp(TargetOpcode::G_SAVGFLOOR);
1844+
case Intrinsic::aarch64_neon_srhadd:
1845+
return LowerBinOp(TargetOpcode::G_SAVGCEIL);
18371846
case Intrinsic::aarch64_neon_abs: {
18381847
// Lower the intrinsic to G_ABS.
18391848
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,26 @@
7979
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
8080
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
8181
#
82+
# DEBUG-NEXT: G_UAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
83+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
84+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
85+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
86+
#
87+
# DEBUG-NEXT: G_UAVGCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
88+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
89+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
90+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
91+
#
92+
# DEBUG-NEXT: G_SAVGFLOOR (opcode {{[0-9]+}}): 1 type index, 0 imm indices
93+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
94+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
95+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
96+
#
97+
# DEBUG-NEXT: G_SAVGCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
98+
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
99+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
100+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
101+
#
82102
# DEBUG-NEXT: G_IMPLICIT_DEF (opcode {{[0-9]+}}): 1 type index, 0 imm indices
83103
# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
84104
# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
Lines changed: 123 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
34

45
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
56
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
67
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
78
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
89

910
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
10-
; CHECK-LABEL: haddu_zext:
11-
; CHECK: // %bb.0:
12-
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
13-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
14-
; CHECK-NEXT: ret
11+
; CHECK-SD-LABEL: haddu_zext:
12+
; CHECK-SD: // %bb.0:
13+
; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
14+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
15+
; CHECK-SD-NEXT: ret
16+
;
17+
; CHECK-GI-LABEL: haddu_zext:
18+
; CHECK-GI: // %bb.0:
19+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
20+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
21+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
22+
; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
23+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
24+
; CHECK-GI-NEXT: ret
1525
%x0 = zext <8 x i8> %a0 to <8 x i16>
1626
%x1 = zext <8 x i8> %a1 to <8 x i16>
1727
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
2030
}
2131

2232
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
23-
; CHECK-LABEL: rhaddu_zext:
24-
; CHECK: // %bb.0:
25-
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
26-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
27-
; CHECK-NEXT: ret
33+
; CHECK-SD-LABEL: rhaddu_zext:
34+
; CHECK-SD: // %bb.0:
35+
; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
36+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
37+
; CHECK-SD-NEXT: ret
38+
;
39+
; CHECK-GI-LABEL: rhaddu_zext:
40+
; CHECK-GI: // %bb.0:
41+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
42+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
43+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
44+
; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
45+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
46+
; CHECK-GI-NEXT: ret
2847
%x0 = zext <8 x i8> %a0 to <8 x i16>
2948
%x1 = zext <8 x i8> %a1 to <8 x i16>
3049
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
3352
}
3453

3554
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
36-
; CHECK-LABEL: hadds_zext:
37-
; CHECK: // %bb.0:
38-
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
39-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
40-
; CHECK-NEXT: ret
55+
; CHECK-SD-LABEL: hadds_zext:
56+
; CHECK-SD: // %bb.0:
57+
; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
58+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
59+
; CHECK-SD-NEXT: ret
60+
;
61+
; CHECK-GI-LABEL: hadds_zext:
62+
; CHECK-GI: // %bb.0:
63+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
64+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
65+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
66+
; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
67+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
68+
; CHECK-GI-NEXT: ret
4169
%x0 = zext <8 x i8> %a0 to <8 x i16>
4270
%x1 = zext <8 x i8> %a1 to <8 x i16>
4371
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
4674
}
4775

4876
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
49-
; CHECK-LABEL: shaddu_zext:
50-
; CHECK: // %bb.0:
51-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
52-
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
53-
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
54-
; CHECK-NEXT: ret
77+
; CHECK-SD-LABEL: shaddu_zext:
78+
; CHECK-SD: // %bb.0:
79+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
80+
; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
81+
; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
82+
; CHECK-SD-NEXT: ret
83+
;
84+
; CHECK-GI-LABEL: shaddu_zext:
85+
; CHECK-GI: // %bb.0:
86+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
87+
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
88+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
89+
; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
90+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
91+
; CHECK-GI-NEXT: ret
5592
%x0 = zext <8 x i8> %a0 to <8 x i16>
5693
%x1 = zext <8 x i8> %a1 to <8 x i16>
5794
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
6299
; ; negative tests
63100

64101
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
65-
; CHECK-LABEL: haddu_sext:
66-
; CHECK: // %bb.0:
67-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
68-
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
69-
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
70-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
71-
; CHECK-NEXT: ret
102+
; CHECK-SD-LABEL: haddu_sext:
103+
; CHECK-SD: // %bb.0:
104+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
105+
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
106+
; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
107+
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
108+
; CHECK-SD-NEXT: ret
109+
;
110+
; CHECK-GI-LABEL: haddu_sext:
111+
; CHECK-GI: // %bb.0:
112+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
113+
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
114+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
115+
; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
116+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
117+
; CHECK-GI-NEXT: ret
72118
%x0 = sext <8 x i8> %a0 to <8 x i16>
73119
%x1 = sext <8 x i8> %a1 to <8 x i16>
74120
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
77123
}
78124

79125
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
80-
; CHECK-LABEL: urhadd_sext:
81-
; CHECK: // %bb.0:
82-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
83-
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
84-
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
85-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
86-
; CHECK-NEXT: ret
126+
; CHECK-SD-LABEL: urhadd_sext:
127+
; CHECK-SD: // %bb.0:
128+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
129+
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
130+
; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
131+
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
132+
; CHECK-SD-NEXT: ret
133+
;
134+
; CHECK-GI-LABEL: urhadd_sext:
135+
; CHECK-GI: // %bb.0:
136+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
137+
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
138+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
139+
; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
140+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
141+
; CHECK-GI-NEXT: ret
87142
%x0 = sext <8 x i8> %a0 to <8 x i16>
88143
%x1 = sext <8 x i8> %a1 to <8 x i16>
89144
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
92147
}
93148

94149
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
95-
; CHECK-LABEL: hadds_sext:
96-
; CHECK: // %bb.0:
97-
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
98-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
99-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
100-
; CHECK-NEXT: ret
150+
; CHECK-SD-LABEL: hadds_sext:
151+
; CHECK-SD: // %bb.0:
152+
; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
153+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
154+
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
155+
; CHECK-SD-NEXT: ret
156+
;
157+
; CHECK-GI-LABEL: hadds_sext:
158+
; CHECK-GI: // %bb.0:
159+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
160+
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
161+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
162+
; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
163+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
164+
; CHECK-GI-NEXT: ret
101165
%x0 = sext <8 x i8> %a0 to <8 x i16>
102166
%x1 = sext <8 x i8> %a1 to <8 x i16>
103167
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
106170
}
107171

108172
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
109-
; CHECK-LABEL: shaddu_sext:
110-
; CHECK: // %bb.0:
111-
; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
112-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
113-
; CHECK-NEXT: bic v0.8h, #254, lsl #8
114-
; CHECK-NEXT: ret
173+
; CHECK-SD-LABEL: shaddu_sext:
174+
; CHECK-SD: // %bb.0:
175+
; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
176+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
177+
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
178+
; CHECK-SD-NEXT: ret
179+
;
180+
; CHECK-GI-LABEL: shaddu_sext:
181+
; CHECK-GI: // %bb.0:
182+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
183+
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
184+
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
185+
; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
186+
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
187+
; CHECK-GI-NEXT: ret
115188
%x0 = sext <8 x i8> %a0 to <8 x i16>
116189
%x1 = sext <8 x i8> %a1 to <8 x i16>
117190
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
118191
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
119192
ret <8 x i16> %res
120193
}
194+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
195+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)