Skip to content

Commit a6fc487

Browse files
committed
Review comments
- added testcase for v6i16 and fixed issues - added testcases for v1i16 and fixed issues Change-Id: I4694c48ff9f12ee6048efd2394d5b710df7ebbea
1 parent c0298c8 commit a6fc487

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21058,7 +21058,8 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
2105821058
EVT SExt1Type = SExt1.getOperand(0).getValueType();
2105921059

2106021060
if (SExt0Type != SExt1Type || SExt0Type.getScalarType() != ScalarType ||
21061-
SExt0Type.getFixedSizeInBits() > 128)
21061+
SExt0Type.getFixedSizeInBits() > 128 || !SExt0Type.isPow2VectorType() ||
21062+
SExt0Type.getVectorNumElements() == 1)
2106221063
return SDValue();
2106321064

2106421065
SDLoc DL(N);

llvm/test/CodeGen/AArch64/saturating-vec-smull.ll

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,25 @@ define <2 x i64> @saturating_2xi32_2xi64(<2 x i32> %a, <2 x i32> %b) {
105105
ret <2 x i64> %ma
106106
}
107107

108+
define <6 x i16> @saturating_6xi16(<6 x i16> %a, <6 x i16> %b) {
109+
; CHECK-LABEL: saturating_6xi16:
110+
; CHECK: // %bb.0:
111+
; CHECK-NEXT: smull2 v3.4s, v1.8h, v0.8h
112+
; CHECK-NEXT: movi v2.4s, #127, msl #8
113+
; CHECK-NEXT: sqdmulh v0.4h, v1.4h, v0.4h
114+
; CHECK-NEXT: sshr v3.4s, v3.4s, #15
115+
; CHECK-NEXT: smin v2.4s, v3.4s, v2.4s
116+
; CHECK-NEXT: xtn2 v0.8h, v2.4s
117+
; CHECK-NEXT: ret
118+
%as = sext <6 x i16> %a to <6 x i32>
119+
%bs = sext <6 x i16> %b to <6 x i32>
120+
%m = mul <6 x i32> %bs, %as
121+
%sh = ashr <6 x i32> %m, splat (i32 15)
122+
%ma = tail call <6 x i32> @llvm.smin.v6i32(<6 x i32> %sh, <6 x i32> splat (i32 32767))
123+
%t = trunc <6 x i32> %ma to <6 x i16>
124+
ret <6 x i16> %t
125+
}
126+
108127
define <4 x i16> @unsupported_saturation_value_v4i16(<4 x i16> %a, <4 x i16> %b) {
109128
; CHECK-LABEL: unsupported_saturation_value_v4i16:
110129
; CHECK: // %bb.0:
@@ -175,8 +194,30 @@ define <2 x i11> @illegal_source(<2 x i11> %a, <2 x i11> %b) {
175194
%bs = sext <2 x i11> %b to <2 x i32>
176195
%m = mul <2 x i32> %bs, %as
177196
%sh = ashr <2 x i32> %m, splat (i32 15)
178-
%ma = tail call <2 x i32> @llvm.smin.v4i32(<2 x i32> %sh, <2 x i32> splat (i32 32767))
197+
%ma = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %sh, <2 x i32> splat (i32 32767))
179198
%t = trunc <2 x i32> %ma to <2 x i11>
180199
ret <2 x i11> %t
181200
}
182-
201+
define <1 x i16> @saturating_1xi16(<1 x i16> %a, <1 x i16> %b) {
202+
; CHECK-LABEL: saturating_1xi16:
203+
; CHECK: // %bb.0:
204+
; CHECK-NEXT: zip1 v0.4h, v0.4h, v0.4h
205+
; CHECK-NEXT: zip1 v1.4h, v1.4h, v0.4h
206+
; CHECK-NEXT: shl v0.2s, v0.2s, #16
207+
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
208+
; CHECK-NEXT: shl v1.2s, v1.2s, #16
209+
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
210+
; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s
211+
; CHECK-NEXT: movi v1.2s, #127, msl #8
212+
; CHECK-NEXT: sshr v0.2s, v0.2s, #15
213+
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
214+
; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h
215+
; CHECK-NEXT: ret
216+
%as = sext <1 x i16> %a to <1 x i32>
217+
%bs = sext <1 x i16> %b to <1 x i32>
218+
%m = mul <1 x i32> %bs, %as
219+
%sh = ashr <1 x i32> %m, splat (i32 15)
220+
%ma = tail call <1 x i32> @llvm.smin.v1i32(<1 x i32> %sh, <1 x i32> splat (i32 32767))
221+
%t = trunc <1 x i32> %ma to <1 x i16>
222+
ret <1 x i16> %t
223+
}

0 commit comments

Comments
 (0)