Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,6 @@ class CombinerHelper {
/// Transform anyext(trunc(x)) to x.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);

/// Transform zext(trunc(x)) to x.
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg);

/// Transform trunc (shl x, K) to shl (trunc x), K
/// if K < VT.getScalarSizeInBits().
///
Expand Down Expand Up @@ -918,6 +915,10 @@ class CombinerHelper {
bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo);

/// Transform zext of truncate to x or and(x, mask).
bool matchCombineZextTrunc(const MachineInstr &ZextMI,
const MachineInstr &TruncMI, BuildFnTy &MatchInfo);

private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
Expand Down
22 changes: 11 additions & 11 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -758,15 +758,6 @@ def anyext_trunc_fold: GICombineRule <
(apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;

// Fold (zext (trunc x)) -> x if the source type is same as the destination type
// and truncated bits are known to be zero.
def zext_trunc_fold: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_ZEXT):$root,
[{ return Helper.matchCombineZextTrunc(*${root}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;

def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector<Register, 4>">;
def not_cmp_fold : GICombineRule<
(defs root:$d, not_cmp_fold_matchinfo:$info),
Expand Down Expand Up @@ -1791,6 +1782,15 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <

def integer_of_truncate : integer_of_opcode<G_TRUNC>;

/// Transform zext of truncate to x or and(x, mask).
def zext_of_truncate : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_TRUNC $trunc, $src):$TruncMI,
(G_ZEXT $root, $trunc):$ZextMI,
[{ return Helper.matchCombineZextTrunc(*${ZextMI}, *${TruncMI}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${ZextMI}, ${matchinfo}); }])>;


def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
Expand All @@ -1812,7 +1812,8 @@ def cast_combines: GICombineGroup<[
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
integer_of_truncate
integer_of_truncate,
zext_of_truncate
]>;

def canonicalize_icmp : GICombineRule<
Expand Down Expand Up @@ -1869,7 +1870,6 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,

def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
zext_trunc_fold,
sext_inreg_to_zext_inreg]>;

def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,10 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,

// For vectors, CSE the element only for now.
LLT Ty = Res.getLLTTy(*getMRI());
if (Ty.isVector())
if (Ty.isFixedVector())
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Crash in combine-with-flags.mir. Blame me.

return buildSplatBuildVector(Res, buildConstant(Ty.getElementType(), Val));
if (Ty.isScalableVector())
return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
Comment on lines +336 to +339
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API is a mess. I would expect G_SPLAT_VECTOR to just handle fixed vectors

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

G_BUILD_VECTOR is for fixed-length vectors. G_SPLAT_VECTOR is for scalable vectors. It takes a register and implicitly broadcasts it over the scalable vector.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I don't understand why it would be that way (or why this would be an implementation detail users of the MachineIRBuilder would need to concern themselves with)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The underlying issue is that we never teached buildConstant about scalable vectors/types.


FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
Expand Down
14 changes: 0 additions & 14 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2526,20 +2526,6 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
}

bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
if (mi_match(SrcReg, MRI,
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the test in episode I of this combine.

unsigned DstSize = DstTy.getScalarSizeInBits();
unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
}
return false;
}

static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
const unsigned TruncSize = TruncTy.getScalarSizeInBits();
Expand Down
91 changes: 91 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,94 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}

bool CombinerHelper::matchCombineZextTrunc(const MachineInstr &ZextMI,
const MachineInstr &TruncMI,
BuildFnTy &MatchInfo) {
const GZext *Zext = cast<GZext>(&ZextMI);
const GTrunc *Trunc = cast<GTrunc>(&TruncMI);

Register Dst = Zext->getReg(0);
Register Mid = Zext->getSrcReg();
Register Src = Trunc->getSrcReg();

LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);

if (!MRI.hasOneNonDBGUse(Mid))
return false;

unsigned DstSize = DstTy.getScalarSizeInBits();
unsigned MidSize = MRI.getType(Mid).getScalarSizeInBits();
unsigned SrcSize = SrcTy.getScalarSizeInBits();

// Are the truncated bits known to be zero?
if (DstTy == SrcTy &&
(KB->getKnownBits(Src).countMinLeadingZeros() >= DstSize - MidSize)) {
MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, Src); };
return true;
}
Comment on lines +385 to +390
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a bit arbitrary that you only check for the masked off bits being zero in the SrcSize == DstSize case, since in all three cases the AND could be avoided if they're known to be zero.

As an alternative, why not remove this code and leave it to a later AND combine to remove the AND if it can prove it is redundant?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Firstly, this was ported from the original combine. I would assume that it is more powerful than the combines below.

Maybe I misunderstand, but for the and to be redundant either the zext or trunc must be a no-op.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the mask is never zero.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the "SrcSize == DstSize" case, the mask is:
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
I don't believe that it could be zero.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I misunderstand, but for the and to be redundant either the zext or trunc must be a no-op.

zext and trunc are never no-ops since they always change the bit width.

I believe the mask is never zero.

I'm talking about the mask being zero, I'm talking about being able to prove that the masked-off bits of the other value are zero. I.e. (x AND mask) can be combined to x if you can prove (with KnownBits) that every zero bit in mask is also zero in x. That's what the code above is doing for the DstSize == SrcSize case, but there is almost certainly a separate combine that will run afterwards and do it anyway, and do it for all three cases.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For modularity, I would prefer to leave it to the other combine.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I said the first combine comes from episode I of this combine. The and comes from InstCombine:

// If we're actually extending zero bits, then if


// If the sizes are just right we can convert this into a logical
// 'and', which will be much cheaper than the pair of casts.

// If we're actually extending zero bits, then if
// SrcSize < DstSize: zext(Src & mask)
// SrcSize == DstSize: Src & mask
// SrcSize > DstSize: trunc(Src) & mask

if (DstSize == SrcSize) {
// Src & mask.

if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
!isConstantLegalOrBeforeLegalizer(DstTy))
return false;

// build mask.
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));

MatchInfo = [=](MachineIRBuilder &B) {
auto Mask = B.buildConstant(DstTy, AndValue);
B.buildAnd(Dst, Src, Mask);
};
return true;
}

// if (SrcSize < DstSize) {
// // zext(Src & mask).
//
// if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {SrcTy}}) ||
// !isConstantLegalOrBeforeLegalizer(SrcTy) ||
// !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
// return false;
//
// APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
//
// MatchInfo = [=](MachineIRBuilder &B) {
// auto Mask = B.buildConstant(SrcTy, AndValue);
// auto And = B.buildAnd(SrcTy, Src, Mask);
// B.buildZExt(Dst, And);
// };
// return true;
// }

// if (SrcSize > DstSize) {
// // trunc(Src) & mask.
//
// if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, {DstTy}}) ||
// !isConstantLegalOrBeforeLegalizer(DstTy) ||
// !isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}}))
// return false;
//
// APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
//
// MatchInfo = [=](MachineIRBuilder &B) {
// auto Mask = B.buildConstant(DstTy, AndValue);
// auto Trunc = B.buildTrunc(DstTy, Src);
// B.buildAnd(Dst, Trunc, Mask);
// };
// return true;
// }

return false;
}
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,6 @@ def AMDGPUPostLegalizerCombiner: GICombiner<
def AMDGPURegBankCombiner : GICombiner<
"AMDGPURegBankCombinerImpl",
[unmerge_merge, unmerge_cst, unmerge_undef,
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ body: |
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
; CHECK-NEXT: %zext:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK-NEXT: %zext:_(s64) = G_AND %arg1, [[C]]
; CHECK-NEXT: $x0 = COPY %zext(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%arg1:_(s64) = COPY $x0
Expand Down
21 changes: 13 additions & 8 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-with-flags.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: $x1 = COPY [[COPY]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
%0:_(s64) = COPY $x0
%2:_(s32) = nuw G_TRUNC %0
%3:_(s64) = G_ZEXT %2
Expand All @@ -25,9 +27,9 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nsw G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK-NEXT: $x1 = COPY [[ZEXT]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
%0:_(s64) = COPY $x0
%2:_(s32) = nsw G_TRUNC %0
%3:_(s64) = G_ZEXT %2
Expand All @@ -42,9 +44,9 @@ body: |
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
; CHECK-NEXT: $x1 = COPY [[ZEXT]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: $x1 = COPY [[AND]](s64)
%0:_(s64) = COPY $x0
%2:_(s32) = G_TRUNC %0
%3:_(s64) = G_ZEXT %2
Expand Down Expand Up @@ -300,7 +302,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: %sv0:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[COPY]](s64)
; CHECK-NEXT: $z0 = COPY %sv0(<vscale x 2 x s64>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295
; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR [[C]](s64)
; CHECK-NEXT: %z:_(<vscale x 2 x s64>) = G_AND %sv0, [[SPLAT_VECTOR]]
; CHECK-NEXT: $z0 = COPY %z(<vscale x 2 x s64>)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%sv0:_(<vscale x 2 x s64>) = G_SPLAT_VECTOR %0:_(s64)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load (s8))
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SEXTLOAD]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SEXTLOAD]], [[C]]
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
; CHECK-NEXT: $w1 = COPY [[SEXTLOAD]](s32)
%0:_(p0) = COPY $x0
%1:_(s8) = G_LOAD %0 :: (load (s8))
Expand Down
Loading
Loading