Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI) const;
void applyRotateOutOfRange(MachineInstr &MI) const;

bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &UnmergeSrc) const;
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const;

bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;

Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;

// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
// => concat_vectors(src, undef)
def combine_build_unmerge : GICombineRule<
(defs root:$root, register_matchinfo:$unmergeSrc),
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
[{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
(apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
>;

def merge_combines: GICombineGroup<[
unmerge_anyext_build_vector,
unmerge_merge,
Expand All @@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
unmerge_dead_to_trunc,
unmerge_zext_to_zext,
merge_of_x_and_undef,
merge_of_x_and_zero
merge_of_x_and_zero,
combine_build_unmerge
]>;

// Under certain conditions, transform:
Expand Down
85 changes: 84 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGenTypes/LowLevelType.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
Expand Down Expand Up @@ -3463,6 +3464,88 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}

// This pattern aims to match the following shape to avoid extra mov
// instructions
// G_BUILD_VECTOR(
// G_UNMERGE_VALUES(src, 0)
// G_UNMERGE_VALUES(src, 1)
// G_IMPLICIT_DEF
// G_IMPLICIT_DEF
// )
// ->
// G_CONCAT_VECTORS(
// src,
// undef
// )
bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
Register &UnmergeSrc) const {
auto &BV = cast<GBuildVector>(MI);

unsigned BuildUseCount = BV.getNumSources();
if (BuildUseCount % 2 != 0)
return false;

unsigned NumUnmerge = BuildUseCount / 2;

auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);

// Check the first operand is an unmerge and has the correct number of
// operands
if (!Unmerge || Unmerge->getNumOperands() != NumUnmerge + 1)
return false;

UnmergeSrc = Unmerge->getSourceReg();

LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);

// Ensure we only generate legal instructions post-legalizer
if (!IsPreLegalize &&
!isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
return false;

// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
for (unsigned I = 0; I < NumUnmerge; ++I) {
auto MaybeUnmergeReg = BV.getSourceReg(I);
auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);

if (!LoopUnmerge || LoopUnmerge != Unmerge)
return false;

if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
return false;
}

// Check that all of the unmerged values are used
if (Unmerge->getNumDefs() != NumUnmerge)
return false;

// Check that all of the operands after the mid point are undefs.
for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);

if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
return false;
}

return true;
}

void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const {
assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
B.setInstrAndDebugLoc(MI);

Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});

MI.eraseFromParent();
}

// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
Expand Down Expand Up @@ -8426,4 +8509,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI,
}

return false;
}
}
18 changes: 5 additions & 13 deletions llvm/test/CodeGen/AArch64/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -345,19 +345,11 @@ entry:
}

define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fptrunc_v2f32_v2f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
Expand Down
48 changes: 12 additions & 36 deletions llvm/test/CodeGen/AArch64/itofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i32> %a to <2 x half>
Expand All @@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i32> %a to <2 x half>
Expand Down Expand Up @@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i16> %a to <2 x half>
Expand All @@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i16> %a to <2 x half>
Expand Down Expand Up @@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
Expand Down Expand Up @@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:
Expand Down
54 changes: 14 additions & 40 deletions llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) {
}

define void @undef_hi2_v4i16(<2 x i16> %arg0) {
; GFX8-SDAG-LABEL: undef_hi2_v4i16:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: ;;#ASMSTART
; GFX8-SDAG-NEXT: ; use v[0:1]
; GFX8-SDAG-NEXT: ;;#ASMEND
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: undef_hi2_v4i16:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8-GISEL-NEXT: ;;#ASMSTART
; GFX8-GISEL-NEXT: ; use v[0:1]
; GFX8-GISEL-NEXT: ;;#ASMEND
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: undef_hi2_v4i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4i16:
; GFX9: ; %bb.0:
Expand All @@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
}

define void @undef_hi2_v4f16(<2 x half> %arg0) {
; GFX8-SDAG-LABEL: undef_hi2_v4f16:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-SDAG-NEXT: ;;#ASMSTART
; GFX8-SDAG-NEXT: ; use v[0:1]
; GFX8-SDAG-NEXT: ;;#ASMEND
; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: undef_hi2_v4f16:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8-GISEL-NEXT: ;;#ASMSTART
; GFX8-GISEL-NEXT: ; use v[0:1]
; GFX8-GISEL-NEXT: ;;#ASMEND
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
; GFX8-LABEL: undef_hi2_v4f16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: undef_hi2_v4f16:
; GFX9: ; %bb.0:
Expand All @@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX8: {{.*}}