diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7448416c682ab..bf2f0674b5b65 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5941,6 +5941,36 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1)))); return SDValue(); } + case Intrinsic::aarch64_neon_sqxtn: + return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_neon_sqxtun: + return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_neon_uqxtn: + return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_neon_sqshrn: + if (Op.getValueType().isVector()) + return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(), + DAG.getNode(AArch64ISD::VASHR, dl, + Op.getOperand(1).getValueType(), + Op.getOperand(1), Op.getOperand(2))); + return SDValue(); + case Intrinsic::aarch64_neon_sqshrun: + if (Op.getValueType().isVector()) + return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(), + DAG.getNode(AArch64ISD::VASHR, dl, + Op.getOperand(1).getValueType(), + Op.getOperand(1), Op.getOperand(2))); + return SDValue(); + case Intrinsic::aarch64_neon_uqshrn: + if (Op.getValueType().isVector()) + return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(), + DAG.getNode(AArch64ISD::VLSHR, dl, + Op.getOperand(1).getValueType(), + Op.getOperand(1), Op.getOperand(2))); + return SDValue(); case Intrinsic::aarch64_sve_whilelo: return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/false); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6c9f0986b9e34..76a1029415b16 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5430,14 +5430,14 @@ defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; defm SHLL : SIMDVectorLShiftLongBySizeBHS; defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; -defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; -defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; +defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", truncssat_s>; +defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", truncssat_u>; defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; -defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; +defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", truncusat_u>; defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; @@ -5476,85 +5476,6 @@ defm : SIMDVectorLShiftLongBySizeBHSPats; defm : SIMDVectorLShiftLongBySizeBHSPats; defm : SIMDVectorLShiftLongBySizeBHSPats; -// Constant vector values, used in the S/UQXTN patterns below. -def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; -def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; -def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; -def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; -def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; -def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; - -// trunc(umin(X, 255)) -> UQXTRN v8i8 -def : Pat<(v8i8 (truncusat_u (v8i16 V128:$Vn))), - (UQXTNv8i8 V128:$Vn)>; -// trunc(umin(X, 65535)) -> UQXTRN v4i16 -def : Pat<(v4i16 (truncusat_u (v4i32 V128:$Vn))), - (UQXTNv4i16 V128:$Vn)>; -// trunc(umin(X, 4294967295)) -> UQXTRN v2i32 -def : Pat<(v2i32 (truncusat_u (v2i64 V128:$Vn))), - (UQXTNv2i32 V128:$Vn)>; -// trunc(smin(smax(X, -128), 128)) -> SQXTRN -def : Pat<(v8i8 (truncssat_s (v8i16 V128:$Vn))), - (SQXTNv8i8 V128:$Vn)>; -// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN -def : Pat<(v4i16 (truncssat_s (v4i32 V128:$Vn))), - (SQXTNv4i16 V128:$Vn)>; -// trunc(smin(smax(X, -2147483648), 2147483647)) -> SQXTRN -def : Pat<(v2i32 (truncssat_s (v2i64 V128:$Vn))), - (SQXTNv2i32 V128:$Vn)>; -// trunc(umin(smax(X, 0), 255)) -> SQXTUN -def : Pat<(v8i8 (truncssat_u (v8i16 V128:$Vn))), - (SQXTUNv8i8 V128:$Vn)>; -// trunc(umin(smax(X, 0), 65535)) -> SQXTUN -def : Pat<(v4i16 (truncssat_u (v4i32 V128:$Vn))), - (SQXTUNv4i16 V128:$Vn)>; -// trunc(umin(smax(X, 0), 4294967295)) -> SQXTUN -def : Pat<(v2i32 (truncssat_u (v2i64 V128:$Vn))), - (SQXTUNv2i32 V128:$Vn)>; - -// truncusat_u -// concat_vectors(Vd, truncusat_u(Vn)) ~> UQXTRN(Vd, Vn) -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (truncusat_u (v8i16 V128:$Vn))))), - (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (truncusat_u (v4i32 V128:$Vn))))), - (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v4i32 (concat_vectors - (v2i32 V64:$Vd), - (v2i32 (truncusat_u (v2i64 V128:$Vn))))), - (UQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; - -// concat_vectors(Vd, truncssat_s(Vn)) ~> SQXTN2(Vd, Vn) -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (truncssat_s (v8i16 V128:$Vn))))), - (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (truncssat_s (v4i32 V128:$Vn))))), - (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v4i32 (concat_vectors - (v2i32 V64:$Vd), - (v2i32 (truncssat_s (v2i64 V128:$Vn))))), - (SQXTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; - -// concat_vectors(Vd, truncssat_u(Vn)) ~> SQXTUN2(Vd, Vn) -def : Pat<(v16i8 (concat_vectors - (v8i8 V64:$Vd), - (v8i8 (truncssat_u (v8i16 V128:$Vn))))), - (SQXTUNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v8i16 (concat_vectors - (v4i16 V64:$Vd), - (v4i16 (truncssat_u (v4i32 V128:$Vn))))), - (SQXTUNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; -def : Pat<(v4i32 (concat_vectors - (v2i32 V64:$Vd), - (v2i32 (truncssat_u (v2i64 V128:$Vn))))), - (SQXTUNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; - // Select BSWAP vector instructions into REV instructions def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>; @@ -8086,9 +8007,9 @@ defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", - int_aarch64_neon_sqshrn>; + BinOpFrag<(truncssat_s (AArch64vashr node:$LHS, node:$RHS))>>; defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", - int_aarch64_neon_sqshrun>; + BinOpFrag<(truncssat_u (AArch64vashr node:$LHS, node:$RHS))>>; defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), @@ -8109,7 +8030,7 @@ defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", - int_aarch64_neon_uqshrn>; + BinOpFrag<(truncusat_u (AArch64vlshr node:$LHS, node:$RHS))>>; defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, diff --git a/llvm/test/CodeGen/AArch64/qshrn.ll b/llvm/test/CodeGen/AArch64/qshrn.ll index eaba88da7b09f..0212ff53b2503 100644 --- a/llvm/test/CodeGen/AArch64/qshrn.ll +++ b/llvm/test/CodeGen/AArch64/qshrn.ll @@ -4,8 +4,7 @@ define <4 x i16> @NarrowAShrI32By5(<4 x i32> %x) { ; CHECK-LABEL: NarrowAShrI32By5: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #5 -; CHECK-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5 ; CHECK-NEXT: ret %s = ashr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s) @@ -26,8 +25,7 @@ define <4 x i16> @NarrowAShrU32By5(<4 x i32> %x) { define <4 x i16> @NarrowAShrI32By5ToU16(<4 x i32> %x) { ; CHECK-LABEL: NarrowAShrI32By5ToU16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #5 -; CHECK-NEXT: sqxtun v0.4h, v0.4s +; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5 ; CHECK-NEXT: ret %s = ashr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s) @@ -48,8 +46,7 @@ define <4 x i16> @NarrowLShrI32By5(<4 x i32> %x) { define <4 x i16> @NarrowLShrU32By5(<4 x i32> %x) { ; CHECK-LABEL: NarrowLShrU32By5: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #5 -; CHECK-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5 ; CHECK-NEXT: ret %s = lshr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s) @@ -71,8 +68,7 @@ define <4 x i16> @NarrowLShrI32By5ToU16(<4 x i32> %x) { define <2 x i32> @NarrowAShri64By5(<2 x i64> %x) { ; CHECK-LABEL: NarrowAShri64By5: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #5 -; CHECK-NEXT: sqxtn v0.2s, v0.2d +; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5 ; CHECK-NEXT: ret %s = ashr <2 x i64> %x, %r = tail call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64> %s) @@ -93,8 +89,7 @@ define <2 x i32> @NarrowAShrU64By5(<2 x i64> %x) { define <2 x i32> @NarrowAShri64By5ToU32(<2 x i64> %x) { ; CHECK-LABEL: NarrowAShri64By5ToU32: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.2d, v0.2d, #5 -; CHECK-NEXT: sqxtun v0.2s, v0.2d +; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5 ; CHECK-NEXT: ret %s = ashr <2 x i64> %x, %r = tail call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32(<2 x i64> %s) @@ -115,8 +110,7 @@ define <2 x i32> @NarrowLShri64By5(<2 x i64> %x) { define <2 x i32> @NarrowLShrU64By5(<2 x i64> %x) { ; CHECK-LABEL: NarrowLShrU64By5: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.2d, v0.2d, #5 -; CHECK-NEXT: uqxtn v0.2s, v0.2d +; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5 ; CHECK-NEXT: ret %s = lshr <2 x i64> %x, %r = tail call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64> %s) @@ -138,8 +132,7 @@ define <2 x i32> @NarrowLShri64By5ToU32(<2 x i64> %x) { define <8 x i8> @NarrowAShri16By5(<8 x i16> %x) { ; CHECK-LABEL: NarrowAShri16By5: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #5 -; CHECK-NEXT: sqxtn v0.8b, v0.8h +; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5 ; CHECK-NEXT: ret %s = ashr <8 x i16> %x, %r = tail call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> %s) @@ -160,8 +153,7 @@ define <8 x i8> @NarrowAShrU16By5(<8 x i16> %x) { define <8 x i8> @NarrowAShri16By5ToU8(<8 x i16> %x) { ; CHECK-LABEL: NarrowAShri16By5ToU8: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.8h, v0.8h, #5 -; CHECK-NEXT: sqxtun v0.8b, v0.8h +; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5 ; CHECK-NEXT: ret %s = ashr <8 x i16> %x, %r = tail call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> %s) @@ -182,8 +174,7 @@ define <8 x i8> @NarrowLShri16By5(<8 x i16> %x) { define <8 x i8> @NarrowLShrU16By5(<8 x i16> %x) { ; CHECK-LABEL: NarrowLShrU16By5: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.8h, v0.8h, #5 -; CHECK-NEXT: uqxtn v0.8b, v0.8h +; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5 ; CHECK-NEXT: ret %s = lshr <8 x i16> %x, %r = tail call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> %s) @@ -208,8 +199,7 @@ define <8 x i8> @NarrowLShri16By5ToU8(<8 x i16> %x) { define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) { ; CHECK-LABEL: NarrowAShrI32By31: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #16 -; CHECK-NEXT: sqxtn v0.4h, v0.4s +; CHECK-NEXT: sqshrn v0.4h, v0.4s, #16 ; CHECK-NEXT: ret %s = ashr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> %s) @@ -219,8 +209,7 @@ define <4 x i16> @NarrowAShrI32By31(<4 x i32> %x) { define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) { ; CHECK-LABEL: NarrowAShrI32By31ToU16: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v0.4s, v0.4s, #16 -; CHECK-NEXT: sqxtun v0.4h, v0.4s +; CHECK-NEXT: sqshrun v0.4h, v0.4s, #16 ; CHECK-NEXT: ret %s = ashr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> %s) @@ -230,8 +219,7 @@ define <4 x i16> @NarrowAShrI32By31ToU16(<4 x i32> %x) { define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) { ; CHECK-LABEL: NarrowLShrU32By31: ; CHECK: // %bb.0: -; CHECK-NEXT: ushr v0.4s, v0.4s, #16 -; CHECK-NEXT: uqxtn v0.4h, v0.4s +; CHECK-NEXT: uqshrn v0.4h, v0.4s, #16 ; CHECK-NEXT: ret %s = lshr <4 x i32> %x, %r = tail call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> %s) @@ -242,10 +230,8 @@ define <4 x i16> @NarrowLShrU32By31(<4 x i32> %x) { define <16 x i8> @signed_minmax_v8i16_to_v16i8(<16 x i16> %x) { ; CHECK-LABEL: signed_minmax_v8i16_to_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.8h, v0.8h, #5 -; CHECK-NEXT: sshr v1.8h, v1.8h, #5 -; CHECK-NEXT: sqxtn v0.8b, v0.8h -; CHECK-NEXT: sqxtn2 v0.16b, v1.8h +; CHECK-NEXT: sqshrn v0.8b, v0.8h, #5 +; CHECK-NEXT: sqshrn2 v0.16b, v1.8h, #5 ; CHECK-NEXT: ret entry: %s = ashr <16 x i16> %x, @@ -258,10 +244,8 @@ entry: define <16 x i8> @unsigned_minmax_v8i16_to_v16i8(<16 x i16> %x) { ; CHECK-LABEL: unsigned_minmax_v8i16_to_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v0.8h, v0.8h, #5 -; CHECK-NEXT: ushr v1.8h, v1.8h, #5 -; CHECK-NEXT: uqxtn v0.8b, v0.8h -; CHECK-NEXT: uqxtn2 v0.16b, v1.8h +; CHECK-NEXT: uqshrn v0.8b, v0.8h, #5 +; CHECK-NEXT: uqshrn2 v0.16b, v1.8h, #5 ; CHECK-NEXT: ret entry: %s = lshr <16 x i16> %x, @@ -273,10 +257,8 @@ entry: define <16 x i8> @unsigned_signed_minmax_v8i16_to_v16i8(<16 x i16> %x) { ; CHECK-LABEL: unsigned_signed_minmax_v8i16_to_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.8h, v0.8h, #5 -; CHECK-NEXT: sshr v1.8h, v1.8h, #5 -; CHECK-NEXT: sqxtun v0.8b, v0.8h -; CHECK-NEXT: sqxtun2 v0.16b, v1.8h +; CHECK-NEXT: sqshrun v0.8b, v0.8h, #5 +; CHECK-NEXT: sqshrun2 v0.16b, v1.8h, #5 ; CHECK-NEXT: ret entry: %s = ashr <16 x i16> %x, @@ -290,10 +272,8 @@ entry: define <8 x i16> @signed_minmax_v4i32_to_v8i16(<8 x i32> %x) { ; CHECK-LABEL: signed_minmax_v4i32_to_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.4s, v0.4s, #5 -; CHECK-NEXT: sshr v1.4s, v1.4s, #5 -; CHECK-NEXT: sqxtn v0.4h, v0.4s -; CHECK-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NEXT: sqshrn v0.4h, v0.4s, #5 +; CHECK-NEXT: sqshrn2 v0.8h, v1.4s, #5 ; CHECK-NEXT: ret entry: %s = ashr <8 x i32> %x, @@ -306,10 +286,8 @@ entry: define <8 x i16> @unsigned_minmax_v4i32_to_v8i16(<8 x i32> %x) { ; CHECK-LABEL: unsigned_minmax_v4i32_to_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v0.4s, v0.4s, #5 -; CHECK-NEXT: ushr v1.4s, v1.4s, #5 -; CHECK-NEXT: uqxtn v0.4h, v0.4s -; CHECK-NEXT: uqxtn2 v0.8h, v1.4s +; CHECK-NEXT: uqshrn v0.4h, v0.4s, #5 +; CHECK-NEXT: uqshrn2 v0.8h, v1.4s, #5 ; CHECK-NEXT: ret entry: %s = lshr <8 x i32> %x, @@ -321,10 +299,8 @@ entry: define <8 x i16> @unsigned_signed_minmax_v4i32_to_v8i16(<8 x i32> %x) { ; CHECK-LABEL: unsigned_signed_minmax_v4i32_to_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.4s, v0.4s, #5 -; CHECK-NEXT: sshr v1.4s, v1.4s, #5 -; CHECK-NEXT: sqxtun v0.4h, v0.4s -; CHECK-NEXT: sqxtun2 v0.8h, v1.4s +; CHECK-NEXT: sqshrun v0.4h, v0.4s, #5 +; CHECK-NEXT: sqshrun2 v0.8h, v1.4s, #5 ; CHECK-NEXT: ret entry: %s = ashr <8 x i32> %x, @@ -338,10 +314,8 @@ entry: define <4 x i32> @signed_minmax_v4i64_to_v8i32(<4 x i64> %x) { ; CHECK-LABEL: signed_minmax_v4i64_to_v8i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.2d, v0.2d, #5 -; CHECK-NEXT: sshr v1.2d, v1.2d, #5 -; CHECK-NEXT: sqxtn v0.2s, v0.2d -; CHECK-NEXT: sqxtn2 v0.4s, v1.2d +; CHECK-NEXT: sqshrn v0.2s, v0.2d, #5 +; CHECK-NEXT: sqshrn2 v0.4s, v1.2d, #5 ; CHECK-NEXT: ret entry: %s = ashr <4 x i64> %x, @@ -354,10 +328,8 @@ entry: define <4 x i32> @unsigned_minmax_v4i64_to_v8i32(<4 x i64> %x) { ; CHECK-LABEL: unsigned_minmax_v4i64_to_v8i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushr v0.2d, v0.2d, #5 -; CHECK-NEXT: ushr v1.2d, v1.2d, #5 -; CHECK-NEXT: uqxtn v0.2s, v0.2d -; CHECK-NEXT: uqxtn2 v0.4s, v1.2d +; CHECK-NEXT: uqshrn v0.2s, v0.2d, #5 +; CHECK-NEXT: uqshrn2 v0.4s, v1.2d, #5 ; CHECK-NEXT: ret entry: %s = lshr <4 x i64> %x, @@ -369,10 +341,8 @@ entry: define <4 x i32> @unsigned_signed_minmax_v4i64_to_v8i32(<4 x i64> %x) { ; CHECK-LABEL: unsigned_signed_minmax_v4i64_to_v8i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshr v0.2d, v0.2d, #5 -; CHECK-NEXT: sshr v1.2d, v1.2d, #5 -; CHECK-NEXT: sqxtun v0.2s, v0.2d -; CHECK-NEXT: sqxtun2 v0.4s, v1.2d +; CHECK-NEXT: sqshrun v0.2s, v0.2d, #5 +; CHECK-NEXT: sqshrun2 v0.4s, v1.2d, #5 ; CHECK-NEXT: ret entry: %s = ashr <4 x i64> %x,