Skip to content

Commit d8581ea

Browse files
committed
[AIEX] Align stack memsets to allow vector stores
In case of stack objects, if they are big enough, we can improve alignment.
1 parent 403b801 commit d8581ea

File tree

4 files changed

+109
-160
lines changed

4 files changed

+109
-160
lines changed

llvm/lib/Target/AIE/AIECombine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@ def combine_phi_undef : GICombineRule<
222222
[{ return matchPhiOfUndef(*${root}, MRI, Observer, ${matchinfo}); }]),
223223
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
224224

225+
def combine_align_memset : GICombineRule<
226+
(defs root:$root, build_fn_matchinfo:$matchinfo),
227+
(match (wip_match_opcode G_MEMSET): $root,
228+
[{ return matchAlignMemset(*${root}, MRI, (const AIEBaseInstrInfo &)B.getTII(), Observer, ${matchinfo}); }]),
229+
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
230+
225231
// AIE-specifc combines (currently shared by AIE2 and AIE2P).
226232
def aie_additional_combines : GICombineGroup<[
227233
combine_unpad_vector,
@@ -240,7 +246,8 @@ def aie_additional_combines : GICombineGroup<[
240246
combine_concat_unmerge_phis,
241247
combine_trunc_ext,
242248
combine_load_const,
243-
combine_phi_undef
249+
combine_phi_undef,
250+
combine_align_memset
244251
]>;
245252

246253
// AIE2P-specific combines.

llvm/lib/Target/AIE/AIECombinerHelper.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
2121
#include "llvm/CodeGen/GlobalISel/Utils.h"
2222
#include "llvm/CodeGen/MachineBasicBlock.h"
23+
#include "llvm/CodeGen/MachineFrameInfo.h"
2324
#include "llvm/CodeGen/MachineInstr.h"
2425
#include "llvm/CodeGen/MachineInstrBuilder.h"
2526
#include "llvm/CodeGen/MachineRegionInfo.h"
@@ -60,6 +61,10 @@ cl::opt<bool> CombineVecShiftByZero(
6061
"aie-combine-vec-shift-by-zero", cl::init(true), cl::Hidden,
6162
cl::desc("Combine vectors shift by zero into copies."));
6263

64+
cl::opt<bool> MemsetOptimizations(
65+
"aie-optimize-memsets", cl::init(true), cl::Hidden,
66+
cl::desc("Apply memset optimizations (peeling/align/etc.)."));
67+
6368
static unsigned getNumMaskUndefs(const ArrayRef<int> &Mask,
6469
unsigned StartIndex) {
6570
unsigned Count = 0;
@@ -3787,3 +3792,65 @@ bool llvm::matchPhiOfUndef(MachineInstr &Phi, MachineRegisterInfo &MRI,
37873792

37883793
return true;
37893794
}
3795+
3796+
// If we have enough bytes set on a stack memset, we can simply
3797+
// align this stack object to avoid store scalarization during
3798+
// legalization.
3799+
bool llvm::matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
3800+
const AIEBaseInstrInfo &TII,
3801+
GISelChangeObserver &Observer,
3802+
BuildFnTy &MatchInfo) {
3803+
assert(MI.getOpcode() == TargetOpcode::G_MEMSET && "Expected a G_MEMSET");
3804+
3805+
if (!MemsetOptimizations)
3806+
return false;
3807+
3808+
// Try to keep alignment increase as minimum.
3809+
const unsigned BasicVectorByteSize = TII.getBasicVecRegSize() / 8;
3810+
// Half vectors are also supported.
3811+
const unsigned HalfVectorByteSize = BasicVectorByteSize / 2;
3812+
3813+
const Register CountReg = MI.getOperand(2).getReg();
3814+
auto Cst = getIConstantVRegValWithLookThrough(CountReg, MRI);
3815+
if (!Cst)
3816+
return false;
3817+
const unsigned ByteCount = Cst->Value.getZExtValue();
3818+
3819+
// Can we fill, at least, half of a basic vector?
3820+
if (ByteCount < HalfVectorByteSize)
3821+
return false;
3822+
3823+
const Register PtrReg = MI.getOperand(0).getReg();
3824+
const MachineInstr *DefDataInst = MRI.getUniqueVRegDef(PtrReg);
3825+
3826+
if (DefDataInst->getOpcode() != TargetOpcode::G_FRAME_INDEX)
3827+
return false;
3828+
3829+
if (MI.memoperands_empty())
3830+
return false;
3831+
MachineMemOperand *MMO = MI.memoperands().front();
3832+
3833+
const int FrameIndex = DefDataInst->getOperand(1).getIndex();
3834+
3835+
const Align OptimalAlign =
3836+
Align(ByteCount < BasicVectorByteSize ? HalfVectorByteSize
3837+
: BasicVectorByteSize);
3838+
const Align MMOAlign = MMO->getAlign();
3839+
3840+
if (MMOAlign == OptimalAlign)
3841+
return false;
3842+
3843+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
3844+
MachineFunction *MF = MI.getMF();
3845+
MachineFrameInfo &MFI = MF->getFrameInfo();
3846+
MFI.setObjectAlignment(FrameIndex, OptimalAlign);
3847+
const LocationSize Size = MMO->getSize();
3848+
MI.dropMemRefs(*MF);
3849+
MI.addMemOperand(*MF,
3850+
MF->getMachineMemOperand(
3851+
MachinePointerInfo::getFixedStack(*MF, FrameIndex),
3852+
MachineMemOperand::MOStore, Size, OptimalAlign));
3853+
};
3854+
3855+
return true;
3856+
}

llvm/lib/Target/AIE/AIECombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,10 @@ bool matchPhiBitcast(MachineInstr &Phi, MachineRegisterInfo &MRI,
288288
bool matchPhiOfUndef(MachineInstr &MI, MachineRegisterInfo &MRI,
289289
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
290290

291+
bool matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI,
292+
const AIEBaseInstrInfo &TII,
293+
GISelChangeObserver &Observer, BuildFnTy &MatchInfo);
294+
291295
} // namespace llvm
292296

293297
#endif

0 commit comments

Comments
 (0)