Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5941,6 +5941,36 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1))));
return SDValue();
}
case Intrinsic::aarch64_neon_sqxtn:
return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_neon_sqxtun:
return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_neon_uqxtn:
return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_neon_sqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, dl,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
case Intrinsic::aarch64_neon_sqshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, dl,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
case Intrinsic::aarch64_neon_uqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
DAG.getNode(AArch64ISD::VLSHR, dl,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
case Intrinsic::aarch64_sve_whilelo:
return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false,
/*IsEqual=*/false);
Expand Down
91 changes: 6 additions & 85 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5430,14 +5430,14 @@ defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
defm SHLL : SIMDVectorLShiftLongBySizeBHS;
defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", truncssat_s>;
defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", truncssat_u>;
defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", truncusat_u>;
defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
Expand Down Expand Up @@ -5476,85 +5476,6 @@ defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;

// Constant vector values, used in the S/UQXTN patterns below.
def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;

// trunc(umin(X, 255)) -> UQXTRN v8i8
def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))),
(UQXTNv8i8 V128:$Vn)>;
// trunc(umin(X, 65535)) -> UQXTRN v4i16
def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))),
(UQXTNv4i16 V128:$Vn)>;
// trunc(umin(X, 4294967295)) -> UQXTRN v2i32
def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))),
(UQXTNv2i32 V128:$Vn)>;
// trunc(smin(smax(X, -128), 128)) -> SQXTRN
def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))),
(SQXTNv8i8 V128:$Vn)>;
// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))),
(SQXTNv4i16 V128:$Vn)>;
// trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN
def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))),
(SQXTNv2i32 V128:$Vn)>;
// trunc(umin(smax(X, 0), 255)) -> SQXTUN
def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))),
(SQXTUNv8i8 V128:$Vn)>;
// trunc(umin(smax(X, 0), 65535)) -> SQXTUN
def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))),
(SQXTUNv4i16 V128:$Vn)>;
// trunc(umin(smax(X, 0), 4294967295)) -> SQXTUN
def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))),
(SQXTUNv2i32 V128:$Vn)>;

// truncusat_u
// concat_vectors(Vd, truncusat_u(Vn)) ~> UQXTRN(Vd, Vn)
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (truncusat_u (v8i16 V128:$Vn))))),
(UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (truncusat_u (v4i32 V128:$Vn))))),
(UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (truncusat_u (v2i64 V128:$Vn))))),
(UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;

// concat_vectors(Vd, truncssat_s(Vn)) ~> SQXTN2(Vd, Vn)
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (truncssat_s (v8i16 V128:$Vn))))),
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (truncssat_s (v4i32 V128:$Vn))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (truncssat_s (v2i64 V128:$Vn))))),
(SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;

// concat_vectors(Vd, truncssat_u(Vn)) ~> SQXTUN2(Vd, Vn)
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (truncssat_u (v8i16 V128:$Vn))))),
(SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (truncssat_u (v4i32 V128:$Vn))))),
(SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (truncssat_u (v2i64 V128:$Vn))))),
(SQXTUNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;

// Select BSWAP vector instructions into REV instructions
def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),
(v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
Expand Down Expand Up @@ -8086,9 +8007,9 @@ defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
BinOpFrag<(truncssat_s (AArch64vashr node:$LHS, node:$RHS))>>;
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
BinOpFrag<(truncssat_u (AArch64vashr node:$LHS, node:$RHS))>>;
defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftR64:$imm))),
Expand All @@ -8109,7 +8030,7 @@ defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;
BinOpFrag<(truncusat_u (AArch64vlshr node:$LHS, node:$RHS))>>;
defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
Expand Down
90 changes: 30 additions & 60 deletions llvm/test/CodeGen/AArch64/qshrn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
define <4 x i16> @NarrowAShrI32By5(<4 x i32> %x) {
; CHECK-LABEL: NarrowAShrI32By5:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5
; CHECK-NEXT: ret
%s = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s)
Expand All @@ -26,8 +25,7 @@ define <4 x i16> @NarrowAShrU32By5(<4 x i32> %x) {
define <4 x i16> @NarrowAShrI32By5ToU16(<4 x i32> %x) {
; CHECK-LABEL: NarrowAShrI32By5ToU16:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
; CHECK-NEXT: sqxtun v0.4h, v0.4s
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5
; CHECK-NEXT: ret
%s = ashr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s)
Expand All @@ -48,8 +46,7 @@ define <4 x i16> @NarrowLShrI32By5(<4 x i32> %x) {
define <4 x i16> @NarrowLShrU32By5(<4 x i32> %x) {
; CHECK-LABEL: NarrowLShrU32By5:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.4s, v0.4s, #5
; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5
; CHECK-NEXT: ret
%s = lshr <4 x i32> %x, <i32 5, i32 5, i32 5, i32 5>
%r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s)
Expand All @@ -71,8 +68,7 @@ define <4 x i16> @NarrowLShrI32By5ToU16(<4 x i32> %x) {
define <2 x i32> @NarrowAShri64By5(<2 x i64> %x) {
; CHECK-LABEL: NarrowAShri64By5:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
; CHECK-NEXT: sqxtn v0.2s, v0.2d
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5
; CHECK-NEXT: ret
%s = ashr <2 x i64> %x, <i64 5, i64 5>
%r = tail call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %s)
Expand All @@ -93,8 +89,7 @@ define <2 x i32> @NarrowAShrU64By5(<2 x i64> %x) {
define <2 x i32> @NarrowAShri64By5ToU32(<2 x i64> %x) {
; CHECK-LABEL: NarrowAShri64By5ToU32:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
; CHECK-NEXT: sqxtun v0.2s, v0.2d
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5
; CHECK-NEXT: ret
%s = ashr <2 x i64> %x, <i64 5, i64 5>
%r = tail call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %s)
Expand All @@ -115,8 +110,7 @@ define <2 x i32> @NarrowLShri64By5(<2 x i64> %x) {
define <2 x i32> @NarrowLShrU64By5(<2 x i64> %x) {
; CHECK-LABEL: NarrowLShrU64By5:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.2d, v0.2d, #5
; CHECK-NEXT: uqxtn v0.2s, v0.2d
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5
; CHECK-NEXT: ret
%s = lshr <2 x i64> %x, <i64 5, i64 5>
%r = tail call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %s)
Expand All @@ -138,8 +132,7 @@ define <2 x i32> @NarrowLShri64By5ToU32(<2 x i64> %x) {
define <8 x i8> @NarrowAShri16By5(<8 x i16> %x) {
; CHECK-LABEL: NarrowAShri16By5:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
; CHECK-NEXT: sqxtn v0.8b, v0.8h
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5
; CHECK-NEXT: ret
%s = ashr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
%r = tail call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %s)
Expand All @@ -160,8 +153,7 @@ define <8 x i8> @NarrowAShrU16By5(<8 x i16> %x) {
define <8 x i8> @NarrowAShri16By5ToU8(<8 x i16> %x) {
; CHECK-LABEL: NarrowAShri16By5ToU8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
; CHECK-NEXT: sqxtun v0.8b, v0.8h
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5
; CHECK-NEXT: ret
%s = ashr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
%r = tail call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %s)
Expand All @@ -182,8 +174,7 @@ define <8 x i8> @NarrowLShri16By5(<8 x i16> %x) {
define <8 x i8> @NarrowLShrU16By5(<8 x i16> %x) {
; CHECK-LABEL: NarrowLShrU16By5:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.8h, v0.8h, #5
; CHECK-NEXT: uqxtn v0.8b, v0.8h
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5
; CHECK-NEXT: ret
%s = lshr <8 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
%r = tail call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %s)
Expand All @@ -208,8 +199,7 @@ define <8 x i8> @NarrowLShri16By5ToU8(<8 x i16> %x) {
define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) {
; CHECK-LABEL: NarrowAShrI32By31:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.4s, v0.4s, #16
; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
%s = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s)
Expand All @@ -219,8 +209,7 @@ define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) {
define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) {
; CHECK-LABEL: NarrowAShrI32By31ToU16:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.4s, v0.4s, #16
; CHECK-NEXT: sqxtun v0.4h, v0.4s
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #16
; CHECK-NEXT: ret
%s = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s)
Expand All @@ -230,8 +219,7 @@ define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) {
define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) {
; CHECK-LABEL: NarrowLShrU32By31:
; CHECK: // %bb.0:
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #16
; CHECK-NEXT: ret
%s = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s)
Expand All @@ -242,10 +230,8 @@ define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) {
define <16 x i8> @signed_minmax_v8i16_to_v16i8(<16 x i16> %x) {
; CHECK-LABEL: signed_minmax_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
; CHECK-NEXT: sshr v1.8h, v1.8h, #5
; CHECK-NEXT: sqxtn v0.8b, v0.8h
; CHECK-NEXT: sqxtn2 v0.16b, v1.8h
; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5
; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #5
; CHECK-NEXT: ret
entry:
%s = ashr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
Expand All @@ -258,10 +244,8 @@ entry:
define <16 x i8> @unsigned_minmax_v8i16_to_v16i8(<16 x i16> %x) {
; CHECK-LABEL: unsigned_minmax_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushr v0.8h, v0.8h, #5
; CHECK-NEXT: ushr v1.8h, v1.8h, #5
; CHECK-NEXT: uqxtn v0.8b, v0.8h
; CHECK-NEXT: uqxtn2 v0.16b, v1.8h
; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5
; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #5
; CHECK-NEXT: ret
entry:
%s = lshr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
Expand All @@ -273,10 +257,8 @@ entry:
define <16 x i8> @unsigned_signed_minmax_v8i16_to_v16i8(<16 x i16> %x) {
; CHECK-LABEL: unsigned_signed_minmax_v8i16_to_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.8h, v0.8h, #5
; CHECK-NEXT: sshr v1.8h, v1.8h, #5
; CHECK-NEXT: sqxtun v0.8b, v0.8h
; CHECK-NEXT: sqxtun2 v0.16b, v1.8h
; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5
; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #5
; CHECK-NEXT: ret
entry:
%s = ashr <16 x i16> %x, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
Expand All @@ -290,10 +272,8 @@ entry:
define <8 x i16> @signed_minmax_v4i32_to_v8i16(<8 x i32> %x) {
; CHECK-LABEL: signed_minmax_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
; CHECK-NEXT: sshr v1.4s, v1.4s, #5
; CHECK-NEXT: sqxtn v0.4h, v0.4s
; CHECK-NEXT: sqxtn2 v0.8h, v1.4s
; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5
; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #5
; CHECK-NEXT: ret
entry:
%s = ashr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
Expand All @@ -306,10 +286,8 @@ entry:
define <8 x i16> @unsigned_minmax_v4i32_to_v8i16(<8 x i32> %x) {
; CHECK-LABEL: unsigned_minmax_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushr v0.4s, v0.4s, #5
; CHECK-NEXT: ushr v1.4s, v1.4s, #5
; CHECK-NEXT: uqxtn v0.4h, v0.4s
; CHECK-NEXT: uqxtn2 v0.8h, v1.4s
; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5
; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #5
; CHECK-NEXT: ret
entry:
%s = lshr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
Expand All @@ -321,10 +299,8 @@ entry:
define <8 x i16> @unsigned_signed_minmax_v4i32_to_v8i16(<8 x i32> %x) {
; CHECK-LABEL: unsigned_signed_minmax_v4i32_to_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.4s, v0.4s, #5
; CHECK-NEXT: sshr v1.4s, v1.4s, #5
; CHECK-NEXT: sqxtun v0.4h, v0.4s
; CHECK-NEXT: sqxtun2 v0.8h, v1.4s
; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5
; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #5
; CHECK-NEXT: ret
entry:
%s = ashr <8 x i32> %x, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
Expand All @@ -338,10 +314,8 @@ entry:
define <4 x i32> @signed_minmax_v4i64_to_v8i32(<4 x i64> %x) {
; CHECK-LABEL: signed_minmax_v4i64_to_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
; CHECK-NEXT: sshr v1.2d, v1.2d, #5
; CHECK-NEXT: sqxtn v0.2s, v0.2d
; CHECK-NEXT: sqxtn2 v0.4s, v1.2d
; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5
; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #5
; CHECK-NEXT: ret
entry:
%s = ashr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>
Expand All @@ -354,10 +328,8 @@ entry:
define <4 x i32> @unsigned_minmax_v4i64_to_v8i32(<4 x i64> %x) {
; CHECK-LABEL: unsigned_minmax_v4i64_to_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ushr v0.2d, v0.2d, #5
; CHECK-NEXT: ushr v1.2d, v1.2d, #5
; CHECK-NEXT: uqxtn v0.2s, v0.2d
; CHECK-NEXT: uqxtn2 v0.4s, v1.2d
; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5
; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #5
; CHECK-NEXT: ret
entry:
%s = lshr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>
Expand All @@ -369,10 +341,8 @@ entry:
define <4 x i32> @unsigned_signed_minmax_v4i64_to_v8i32(<4 x i64> %x) {
; CHECK-LABEL: unsigned_signed_minmax_v4i64_to_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sshr v0.2d, v0.2d, #5
; CHECK-NEXT: sshr v1.2d, v1.2d, #5
; CHECK-NEXT: sqxtun v0.2s, v0.2d
; CHECK-NEXT: sqxtun2 v0.4s, v1.2d
; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5
; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #5
; CHECK-NEXT: ret
entry:
%s = ashr <4 x i64> %x, <i64 5, i64 5, i64 5, i64 5>
Expand Down
Loading