Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,12 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI) const;
void applyRotateOutOfRange(MachineInstr &MI) const;

bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &UnmergeSrc) const;
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const;

bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const;

Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule <
[{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>;

// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef)
// => concat_vectors(src, undef)
def combine_build_unmerge : GICombineRule<
(defs root:$root, register_matchinfo:$unmergeSrc),
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
[{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]),
(apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }])
>;

def merge_combines: GICombineGroup<[
unmerge_anyext_build_vector,
unmerge_merge,
Expand All @@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[
unmerge_dead_to_trunc,
unmerge_zext_to_zext,
merge_of_x_and_undef,
merge_of_x_and_zero
merge_of_x_and_zero,
combine_build_unmerge
]>;

// Under certain conditions, transform:
Expand Down
85 changes: 85 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,91 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}

// This pattern aims to match the following shape to avoid extra mov
// instructions
// G_BUILD_VECTOR(
// G_UNMERGE_VALUES(src, 0)
// G_UNMERGE_VALUES(src, 1)
// G_IMPLICIT_DEF
// G_IMPLICIT_DEF
// )
// ->
// G_CONCAT_VECTORS(
// src,
// undef
// )
bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
Register &UnmergeSrc) const {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);

unsigned BuildUseCount = MI.getNumOperands() - 1;

if (BuildUseCount % 2 != 0)
return false;

unsigned NumUnmerge = BuildUseCount / 2;

// Check the first operand is an unmerge
auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;

// Check that the resultant concat will be legal
auto UnmergeEltSize =
MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits();
auto UnmergeEltCount = MaybeUnmerge->getNumDefs();

if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 &&
UnmergeEltSize * UnmergeEltCount != 128))
return false;

// Check that all of the operands before the midpoint come from the same
// unmerge and are in the same order as they are used in the build_vector
for (unsigned I = 0; I < NumUnmerge; ++I) {
auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg();
auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI);

if (Unmerge != MaybeUnmerge)
return false;

if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg)
return false;
}

// Check that all of the unmerged values are used
if (UnmergeEltCount != NumUnmerge)
return false;

// Check that all of the operands after the mid point are undefs.
for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI);

if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
return false;
}

// Unmerge should only use one register so we can use the last one
for (auto &UnmergeUse :
getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses())
UnmergeSrc = UnmergeUse.getReg();

return true;
}

void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
Register &UnmergeSrc) const {
assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
B.setInstrAndDebugLoc(MI);

Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});

MI.eraseFromParent();
}

// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
#include <set>

#define GET_GICOMBINER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
Expand Down Expand Up @@ -890,4 +891,4 @@ namespace llvm {
FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {
return new AArch64PostLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
} // end namespace llvm
18 changes: 5 additions & 13 deletions llvm/test/CodeGen/AArch64/fptrunc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -345,19 +345,11 @@ entry:
}

define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fptrunc_v2f32_v2f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
Expand Down
48 changes: 12 additions & 36 deletions llvm/test/CodeGen/AArch64/itofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d
; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i64> %a to <2 x half>
Expand Down Expand Up @@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i32> %a to <2 x half>
Expand All @@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-NOFP16-GI: // %bb.0: // %entry
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16:
; CHECK-FP16-GI: // %bb.0: // %entry
; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i32> %a to <2 x half>
Expand Down Expand Up @@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = sitofp <2 x i16> %a to <2 x half>
Expand All @@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
entry:
%c = uitofp <2 x i16> %a to <2 x half>
Expand Down Expand Up @@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16:
Expand Down Expand Up @@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s
; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0]
; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1]
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s
; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NOFP16-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16:
Expand Down