diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index fe8419301b306..ad071a1cce7f1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -239,6 +239,30 @@ def G_USDOT : AArch64GenericInstruction { let hasSideEffects = 0; } +def G_AVGFLOORU : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; +} + +def G_AVGCEILU : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; +} + +def G_AVGFLOORS : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; +} + +def G_AVGCEILS : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = 0; +} + // Generic instruction for the BSP pseudo. It is expanded into BSP, which // expands into BSL/BIT/BIF after register allocation. def G_BSP : AArch64GenericInstruction { @@ -286,6 +310,11 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; + def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 05a431312472e..1d865e3cd85aa 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1825,6 +1825,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return LowerBinOp(TargetOpcode::G_ABDS); case Intrinsic::aarch64_neon_uabd: return LowerBinOp(TargetOpcode::G_ABDU); + case Intrinsic::aarch64_neon_uhadd: + return LowerBinOp(AArch64::G_AVGFLOORU); + case Intrinsic::aarch64_neon_urhadd: + return LowerBinOp(AArch64::G_AVGCEILU); + case Intrinsic::aarch64_neon_shadd: + return LowerBinOp(AArch64::G_AVGFLOORS); + case Intrinsic::aarch64_neon_srhadd: + return LowerBinOp(AArch64::G_AVGCEILS); case Intrinsic::aarch64_neon_abs: { // Lower the intrinsic to G_ABS. MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)}); diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index f900f0209a108..a6fbaf01c5476 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64 < %s | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) @@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: haddu_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: haddu_zext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: haddu_zext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: rhaddu_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: rhaddu_zext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: rhaddu_zext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: hadds_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: hadds_zext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: hadds_zext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: shaddu_zext: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shaddu_zext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shaddu_zext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; ; negative tests define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: haddu_sext: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: haddu_sext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: haddu_sext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = sext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: urhadd_sext: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: urhadd_sext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: urhadd_sext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = sext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: hadds_sext: -; CHECK: // %bb.0: -; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: bic v0.8h, #254, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: hadds_sext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: hadds_sext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = sext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) { } define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: shaddu_sext: -; CHECK: // %bb.0: -; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: bic v0.8h, #254, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shaddu_sext: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shaddu_sext: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8 +; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = sext <8 x i8> %a1 to <8 x i16> %hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) %res = and <8 x i16> %hadd, ret <8 x i16> %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll index fb909fec90434..136ac8b0a2aa1 100644 --- a/llvm/test/CodeGen/AArch64/freeze.ll +++ b/llvm/test/CodeGen/AArch64/freeze.ll @@ -3,10 +3,6 @@ ; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd %struct.T = type { i32, i32 } @@ -435,13 +431,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) { } define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: freeze_uhadd: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.8h, #15 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: freeze_uhadd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.8h, #15 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: freeze_uhadd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.8h, #15 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: movi v2.8h, #31 +; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %m0 = and <8 x i16> %a0, splat (i16 15) %m1 = and <8 x i16> %a1, splat (i16 15) %avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1) @@ -451,13 +457,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) { } define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: freeze_urhadd: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.8h, #15 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: freeze_urhadd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.8h, #15 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: freeze_urhadd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.8h, #15 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: movi v2.8h, #31 +; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %m0 = and <8 x i16> %a0, splat (i16 15) %m1 = and <8 x i16> %a1, splat (i16 15) %avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1) @@ -467,12 +483,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) { } define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) { -; CHECK-LABEL: freeze_shadd: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshr v1.8h, v1.8h, #8 -; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: freeze_shadd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8 +; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: freeze_shadd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8 +; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = ashr <8 x i16> %a1, splat (i16 8) %avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1) @@ -483,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) { } define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) { -; CHECK-LABEL: freeze_srhadd: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshr v1.8h, v1.8h, #8 -; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: freeze_srhadd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8 +; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: freeze_srhadd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8 +; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8 +; CHECK-GI-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> %x1 = ashr <8 x i16> %a1, splat (i16 8) %avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)