Skip to content

Commit 4ff97cf

Browse files
committed
[AIEX] Peel stores from memsets to reach alignment
We can peel some stores to reach alignment on memsets. The alignment of the base pointer can be evaluated using other uses of the pointer.
1 parent cd8d52e commit 4ff97cf

File tree

4 files changed

+361
-354
lines changed

4 files changed

+361
-354
lines changed

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,12 @@ def combine_align_memset : GICombineRule<
228228
[{ return matchAlignMemset(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), Observer, ${matchinfo}); }]),
229229
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
230230

231+
def combine_peel_memset : GICombineRule<
232+
(defs root:$root, build_fn_matchinfo:$matchinfo),
233+
(match (wip_match_opcode G_MEMSET): $root,
234+
[{ return matchPeelMemset(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), Observer, ${matchinfo}); }]),
235+
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
236+
231237
// AIE-specifc combines (currently shared by AIE2 and AIE2P).
232238
def aie_additional_combines : GICombineGroup<[
233239
combine_unpad_vector,
@@ -247,7 +253,8 @@ def aie_additional_combines : GICombineGroup<[
247253
combine_trunc_ext,
248254
combine_load_const,
249255
combine_phi_undef,
250-
combine_align_memset
256+
combine_align_memset,
257+
combine_peel_memset
251258
]>;
252259

253260
// AIE2P-specific combines.

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/CodeGen/TargetOpcodes.h"
2929
#include "llvm/IR/IntrinsicsAIE2.h"
3030
#include "llvm/IR/IntrinsicsAIE2P.h"
31+
#include "llvm/Support/Alignment.h"
3132
#include "llvm/Support/ErrorHandling.h"
3233
#include <optional>
3334

@@ -3854,3 +3855,177 @@ bool llvm::matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
38543855

38553856
return true;
38563857
}
3858+
3859+
static std::optional<std::pair<Register, int64_t>>
3860+
getPtrAndConstantOffset(const MachineInstr *MI, unsigned PointerIndex,
3861+
MachineRegisterInfo &MRI) {
3862+
assert(MI->getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
3863+
3864+
const Register OffsetReg = MI->getOperand(2).getReg();
3865+
const auto Cst = getIConstantVRegValWithLookThrough(OffsetReg, MRI);
3866+
3867+
if (Cst)
3868+
return std::make_pair(MI->getOperand(PointerIndex).getReg(),
3869+
Cst->Value.getSExtValue());
3870+
3871+
return std::nullopt;
3872+
}
3873+
3874+
template <uint64_t TargetAlign> constexpr bool matchAlignment(uint64_t Value) {
3875+
return isAligned(Align(TargetAlign), Value);
3876+
}
3877+
3878+
static bool isBasePointerWordAligned(Register BasePtr,
3879+
MachineRegisterInfo &MRI) {
3880+
3881+
auto IsBasePointerAligned = [&](const MachineInstr *MI) {
3882+
if (!isa<GLoadStore>(MI))
3883+
return false;
3884+
if (MI->memoperands_empty())
3885+
return false;
3886+
const MachineMemOperand *MMO = MI->memoperands().front();
3887+
return matchAlignment<4>(MMO->getAlign().value());
3888+
};
3889+
3890+
for (const MachineInstr &MI : MRI.use_instructions(BasePtr)) {
3891+
if (MI.getOpcode() == TargetOpcode::G_PTR_ADD) {
3892+
auto RegAndOffset = getPtrAndConstantOffset(&MI, 0, MRI);
3893+
if (!RegAndOffset || !matchAlignment<4>(RegAndOffset->second))
3894+
continue;
3895+
// In one user is aligned, it is enough for us.
3896+
if (any_of(MRI.use_instructions(RegAndOffset->first),
3897+
[&](const MachineInstr &UseMI) {
3898+
return IsBasePointerAligned(&UseMI);
3899+
}))
3900+
return true;
3901+
} else if (IsBasePointerAligned(&MI)) {
3902+
return true;
3903+
}
3904+
}
3905+
3906+
return false;
3907+
}
3908+
3909+
// We try to align MEMSETs by peeling out some stores.
3910+
// To be effective, we need at least 3 bytes here:
3911+
// If we are byte-aligned we can generate
3912+
// - G_STORE s8
3913+
// - G_STORE s16
3914+
// - G_MEMSET (... n - 3 ...)
3915+
// If we are short-aligned we can generate
3916+
// - G_STORE s16
3917+
// - G_MEMSET (... n - 2 ...)
3918+
bool llvm::matchPeelMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
3919+
const AIEBaseInstrInfo &TII,
3920+
GISelChangeObserver &Observer,
3921+
BuildFnTy &MatchInfo) {
3922+
assert(MI.getOpcode() == TargetOpcode::G_MEMSET && "Expected a G_MEMSET");
3923+
3924+
if (!MemsetOptimizations)
3925+
return false;
3926+
3927+
MachineMemOperand *MMO = MI.memoperands().front();
3928+
3929+
if (!MMO)
3930+
return false;
3931+
const Align MMOAlign = MMO->getAlign();
3932+
3933+
// If it is already aligned we have nothing to do.
3934+
if (matchAlignment<4>(MMOAlign.value()))
3935+
return false;
3936+
3937+
const Register SizeReg = MI.getOperand(2).getReg();
3938+
3939+
const auto Cst = getIConstantVRegValWithLookThrough(SizeReg, MRI);
3940+
if (!Cst)
3941+
return false;
3942+
const int64_t Size = Cst->Value.getSExtValue();
3943+
3944+
Register PtrReg = MI.getOperand(0).getReg();
3945+
const Register DataReg = MI.getOperand(1).getReg();
3946+
const auto CstInit = getIConstantVRegValWithLookThrough(DataReg, MRI);
3947+
if (!CstInit)
3948+
return false;
3949+
const uint64_t Initializer = CstInit->Value.getZExtValue();
3950+
3951+
unsigned Offset = 0;
3952+
const MachineInstr *DefPtrReg = MRI.getVRegDef(PtrReg);
3953+
if (DefPtrReg->getOpcode() == TargetOpcode::G_PTR_ADD) {
3954+
const auto RegAndOffset = getPtrAndConstantOffset(DefPtrReg, 1, MRI);
3955+
3956+
if (!RegAndOffset)
3957+
return false;
3958+
3959+
Offset = RegAndOffset->second;
3960+
PtrReg = RegAndOffset->first;
3961+
}
3962+
3963+
// Next step is to prove that the base pointer is word-aligned.
3964+
// As we cannot assume, we can search for aligned uses of the base pointer.
3965+
if (!isBasePointerWordAligned(PtrReg, MRI))
3966+
return false;
3967+
3968+
MatchInfo = [=, &MI, &MRI, &Observer](MachineIRBuilder &B) {
3969+
auto &MF = B.getMF();
3970+
3971+
auto BuildPADD = [&](int64_t CurrentOffset) {
3972+
Register NewPtrReg = MRI.cloneVirtualRegister(PtrReg);
3973+
Register OffsetReg =
3974+
B.buildConstant(LLT::scalar(20), CurrentOffset).getReg(0);
3975+
Observer.createdInstr(*B.buildInstr(TargetOpcode::G_PTR_ADD)
3976+
.addDef(NewPtrReg)
3977+
.addReg(PtrReg)
3978+
.addReg(OffsetReg));
3979+
return NewPtrReg;
3980+
};
3981+
3982+
auto BuildMMO = [&](LocationSize Size, Align A) {
3983+
MachineMemOperand *NewMMO = MF.getMachineMemOperand(
3984+
MMO->getPointerInfo(), MMO->getFlags(), Size, A);
3985+
return NewMMO;
3986+
};
3987+
3988+
int64_t PeelOffset = Offset;
3989+
// If offset is aligned, just fix the alignment.
3990+
if (!matchAlignment<4>(PeelOffset)) {
3991+
// If not short-aligned, align to the next short boundary (1 byte).
3992+
if (!matchAlignment<2>(PeelOffset)) {
3993+
B.buildStore(DataReg, BuildPADD(PeelOffset),
3994+
*BuildMMO(MMO->getSize(), Align(1)));
3995+
PeelOffset++;
3996+
}
3997+
3998+
// If we are short-aligned, but still not word aligned, align to the
3999+
// next word boundary (2 bytes more).
4000+
if (!matchAlignment<4>(PeelOffset)) {
4001+
// Store the next two bytes to align to the next word boundary.
4002+
Register DataRegAdjustedToS16 =
4003+
B.buildConstant(LLT::scalar(16), (Initializer << 8) | Initializer)
4004+
.getReg(0);
4005+
B.buildStore(DataRegAdjustedToS16, BuildPADD(PeelOffset),
4006+
*BuildMMO(2, Align(2)));
4007+
PeelOffset += 2;
4008+
}
4009+
}
4010+
4011+
const unsigned NewSize = Size - (PeelOffset - Offset);
4012+
4013+
// No bytes left to memset.
4014+
if (NewSize == 0) {
4015+
MI.eraseFromParent();
4016+
return;
4017+
}
4018+
4019+
const int64_t MemsetOffset = PeelOffset;
4020+
assert(matchAlignment<4>(MemsetOffset) && "Memset still unaligned?");
4021+
// Now, what remains is aligned, we just need to fix Offset, Size and MMO.
4022+
MachineMemOperand *NewMMOMemSet = BuildMMO(MMO->getSize(), Align(4));
4023+
MI.dropMemRefs(MF); // Safe to drop the MMO now.
4024+
MI.addMemOperand(MF, NewMMOMemSet);
4025+
MI.getOperand(2).setReg(
4026+
B.buildConstant(LLT::scalar(20), NewSize).getReg(0));
4027+
MI.getOperand(0).setReg(BuildPADD(MemsetOffset));
4028+
};
4029+
4030+
return true;
4031+
}

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ bool matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
292292
const AIEBaseInstrInfo &TII,
293293
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
294294

295+
bool matchPeelMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
296+
const AIEBaseInstrInfo &TII, GISelChangeObserver &Observer,
297+
BuildFnTy &MatchInfo);
295298
} // namespace llvm
296299

297300
#endif

0 commit comments

Comments
 (0)