-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AArch64][GlobalISel] Added support for hadd family of intrinsics #163985
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Joshua Rodriguez (JoshdRod) ChangesGlobalISel now selects hadd family of intrinsics, without falling back to SDAG. Full diff: https://github.com/llvm/llvm-project/pull/163985.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index fe8419301b306..187ef00eebd3c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -239,6 +239,30 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_UHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_URHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_SRHADD : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type1:$src2);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -286,6 +310,11 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;
+def : GINodeEquiv<G_UHADD, avgflooru>;
+def : GINodeEquiv<G_URHADD, avgceilu>;
+def : GINodeEquiv<G_SHADD, avgfloors>;
+def : GINodeEquiv<G_SRHADD, avgceils>;
+
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 05a431312472e..d2abdd0662b60 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1825,6 +1825,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
+ case Intrinsic::aarch64_neon_uhadd:
+ return LowerBinOp(AArch64::G_UHADD);
+ case Intrinsic::aarch64_neon_urhadd:
+ return LowerBinOp(AArch64::G_URHADD);
+ case Intrinsic::aarch64_neon_shadd:
+ return LowerBinOp(AArch64::G_SHADD);
+ case Intrinsic::aarch64_neon_srhadd:
+ return LowerBinOp(AArch64::G_SRHADD);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f900f0209a108..a6fbaf01c5476 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
@@ -7,11 +8,20 @@ declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: rhaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rhaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rhaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_zext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_zext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_zext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests
define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: haddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: haddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: haddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: urhadd_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: urhadd_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: urhadd_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: hadds_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: hadds_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: hadds_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}
define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
-; CHECK-LABEL: shaddu_sext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: bic v0.8h, #254, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shaddu_sext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shaddu_sext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index fb909fec90434..136ac8b0a2aa1 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -3,10 +3,6 @@
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; CHECK-GI: warning: Instruction selection used fallback path for freeze_v2i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_uhadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_urhadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_shadd
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for freeze_srhadd
%struct.T = type { i32, i32 }
@@ -435,13 +431,23 @@ define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
}
define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_uhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_uhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_uhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
@@ -451,13 +457,23 @@ define <8 x i16> @freeze_uhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_urhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_urhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_urhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: movi v2.8h, #31
+; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%m0 = and <8 x i16> %a0, splat (i16 15)
%m1 = and <8 x i16> %a1, splat (i16 15)
%avg = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %m0, <8 x i16> %m1)
@@ -467,12 +483,21 @@ define <8 x i16> @freeze_urhadd(<8 x i16> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_shadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_shadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_shadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
@@ -483,12 +508,21 @@ define <8 x i16> @freeze_shadd(<8 x i8> %a0, <8 x i16> %a1) {
}
define <8 x i16> @freeze_srhadd(<8 x i8> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: freeze_srhadd:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshr v1.8h, v1.8h, #8
-; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: freeze_srhadd:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_srhadd:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshr v1.8h, v1.8h, #8
+; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shl v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: sshr v0.8h, v0.8h, #8
+; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = ashr <8 x i16> %a1, splat (i16 8)
%avg = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should probably reuse the same names as is used in SDAG. As much as I dislike them, there are other "hadd" instructions on other architectures that do something a little different IIRC.
GlobalISel now selects uhadd intrinsic, without falling back to SDAG. Note that GlobalISel-generated code involving uhadd seems to be inefficent when compared to SDAG.
GlobalISel now selects urhadd intrinsic, without falling back to SDAG. Note that GlobalISel-generated code involving urhadd seems to be inefficent when compared to SDAG.
GlobalISel now selects shadd intrinsic, without falling back to SDAG. Note that GlobalISel-generated code involving shadd seems to be inefficent when compared to SDAG.
GlobalISel now selects srhadd intrinsic, without falling back to SDAG. Note that GlobalISel-generated code involving uhadd seems to be inefficent when compared to SDAG.
… SDAG and GISel Note that GlobalISel-generated code involving the hadd family of intrinsics seems to be inefficent when compared to SDAG.
7998beb
to
75539ff
Compare
let hasSideEffects = 0; | ||
} | ||
|
||
def G_AVGFLOORU : AArch64GenericInstruction { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we make these generic - add these to the same places as G_ABDU. We will need them eventually when we add combines for them in the same way as SDAG.
GlobalISel now selects hadd family of intrinsics, without falling back to SDAG.