Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI) const;
void applyRotateOutOfRange(MachineInstr &MI) const;

bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &UnmergeSrc) const;
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const;

bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;

Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;

// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
// => concat_vectors(src, undef)
def combine_build_unmerge : GICombineRule<
(defs root:$root, register_matchinfo:$unmergeSrc),
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
[{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
(apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
>;

def merge_combines: GICombineGroup<[
unmerge_anyext_build_vector,
unmerge_merge,
Expand All @@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
unmerge_dead_to_trunc,
unmerge_zext_to_zext,
merge_of_x_and_undef,
merge_of_x_and_zero
merge_of_x_and_zero,
combine_build_unmerge
]>;

// Under certain conditions, transform:
Expand Down
84 changes: 84 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
Expand Down Expand Up @@ -3463,6 +3464,89 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}

// This pattern aims to match the following shape to avoid extra mov
// instructions
// G_BUILD_VECTOR(
// G_UNMERGE_VALUES(src, 0)
// G_UNMERGE_VALUES(src, 1)
// G_IMPLICIT_DEF
// G_IMPLICIT_DEF
// )
// ->
// G_CONCAT_VECTORS(
// src,
// undef
// )
bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
Register &UnmergeSrc) const {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

unsigned BuildUseCount = MI.getNumOperands() - 1;

if (BuildUseCount % 2 != 0)
return false;

unsigned NumUnmerge = BuildUseCount / 2;

// Check the first operand is an unmerge
auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;

LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT UnmergeSrcTy = MRI.getType(
MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg());

// Ensure we only generate legal instructions post-legalizer
if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS,
{DstTy, UnmergeSrcTy, UnmergeSrcTy}}))
return false;

// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
for (unsigned I = 0; I < NumUnmerge; ++I) {
auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);

if (Unmerge != MaybeUnmerge)
return false;

if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
return false;
}

// Check that all of the unmerged values are used
if (MaybeUnmerge->getNumDefs() != NumUnmerge)
return false;

// Check that all of the operands after the mid point are undefs.
for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);

if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
return false;
}

UnmergeSrc =
MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg();

return true;
}

void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const {
assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
B.setInstrAndDebugLoc(MI);

Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});

MI.eraseFromParent();
}

// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -890,4 +890,4 @@ namespace llvm {
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
return new AArch64PostLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
} // end namespace llvm
18 changes: 5 additions & 13 deletions llvm/test/CodeGen/AArch64/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -345,19 +345,11 @@ entry:
}

define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fptrunc_v2f32_v2f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
Expand Down
48 changes: 12 additions & 36 deletions llvm/test/CodeGen/AArch64/itofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i32> %a to <2 x half>
Expand All @@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i32> %a to <2 x half>
Expand Down Expand Up @@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i16> %a to <2 x half>
Expand All @@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i16> %a to <2 x half>
Expand Down Expand Up @@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
Expand Down Expand Up @@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:
Expand Down
54 changes: 14 additions & 40 deletions llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
}

define void @undef_hi2_v4i16(<2 x i16> %arg0) {
; GFX8-SDAG-LABEL: undef_hi2_v4i16:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: ;;#ASMSTART
; GFX8-SDAG-NEXT: ; use v[0:1]
; GFX8-SDAG-NEXT: ;;#ASMEND
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: undef_hi2_v4i16:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8-GISEL-NEXT: ;;#ASMSTART
; GFX8-GISEL-NEXT: ; use v[0:1]
; GFX8-GISEL-NEXT: ;;#ASMEND
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: undef_hi2_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4i16:
; GFX9: ; %bb.0:
Expand All @@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
}

define void @undef_hi2_v4f16(<2 x half> %arg0) {
; GFX8-SDAG-LABEL: undef_hi2_v4f16:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: ;;#ASMSTART
; GFX8-SDAG-NEXT: ; use v[0:1]
; GFX8-SDAG-NEXT: ;;#ASMEND
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: undef_hi2_v4f16:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8-GISEL-NEXT: ;;#ASMSTART
; GFX8-GISEL-NEXT: ; use v[0:1]
; GFX8-GISEL-NEXT: ;;#ASMEND
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: undef_hi2_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4f16:
; GFX9: ; %bb.0:
Expand All @@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX8: {{.*}}