Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,30 @@ def G_USDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}

def G_AVGFLOORU : AArch64GenericInstruction {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make these generic - add these to the same places as G_ABDU. We will need them eventually when we add combines for them in the same way as SDAG.

let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}

def G_AVGCEILU : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}

def G_AVGFLOORS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}

def G_AVGCEILS : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type1:$src2);
let hasSideEffects = 0;
}

// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
Expand Down Expand Up @@ -286,6 +310,11 @@ def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
def : GINodeEquiv<G_USDOT, AArch64usdot>;

def : GINodeEquiv<G_AVGFLOORU, avgflooru>;
def : GINodeEquiv<G_AVGCEILU, avgceilu>;
def : GINodeEquiv<G_AVGFLOORS, avgfloors>;
def : GINodeEquiv<G_AVGCEILS, avgceils>;

def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1825,6 +1825,14 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerBinOp(TargetOpcode::G_ABDS);
case Intrinsic::aarch64_neon_uabd:
return LowerBinOp(TargetOpcode::G_ABDU);
case Intrinsic::aarch64_neon_uhadd:
return LowerBinOp(AArch64::G_AVGFLOORU);
case Intrinsic::aarch64_neon_urhadd:
return LowerBinOp(AArch64::G_AVGCEILU);
case Intrinsic::aarch64_neon_shadd:
return LowerBinOp(AArch64::G_AVGFLOORS);
case Intrinsic::aarch64_neon_srhadd:
return LowerBinOp(AArch64::G_AVGCEILS);
case Intrinsic::aarch64_neon_abs: {
// Lower the intrinsic to G_ABS.
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
Expand Down
171 changes: 123 additions & 48 deletions llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI

declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)

define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: haddu_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: haddu_zext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: haddu_zext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -20,11 +30,20 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: rhaddu_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: rhaddu_zext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: urhadd v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: rhaddu_zext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -33,11 +52,20 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: hadds_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ret
; CHECK-SD-LABEL: hadds_zext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: uhadd v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadds_zext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -46,12 +74,21 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: shaddu_zext:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shaddu_zext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-SD-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shaddu_zext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = zext <8 x i8> %a0 to <8 x i16>
%x1 = zext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -62,13 +99,22 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
; ; negative tests

define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: haddu_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: haddu_sext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-SD-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: haddu_sext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: uhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -77,13 +123,22 @@ define <8 x i16> @haddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: urhadd_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: urhadd_sext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-SD-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: urhadd_sext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: urhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -92,12 +147,21 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: hadds_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: hadds_sext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: shadd v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: hadds_sext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: shadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
Expand All @@ -106,15 +170,26 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) {
}

define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) {
; CHECK-LABEL: shaddu_sext:
; CHECK: // %bb.0:
; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: bic v0.8h, #254, lsl #8
; CHECK-NEXT: ret
; CHECK-SD-LABEL: shaddu_sext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: srhadd v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: bic v0.8h, #254, lsl #8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shaddu_sext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mvni v2.8h, #254, lsl #8
; CHECK-GI-NEXT: srhadd v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-GI-NEXT: ret
%x0 = sext <8 x i8> %a0 to <8 x i16>
%x1 = sext <8 x i8> %a1 to <8 x i16>
%hadd = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x0, <8 x i16> %x1)
%res = and <8 x i16> %hadd, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
ret <8 x i16> %res
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
Loading
Loading