|
20 | 20 | #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" |
21 | 21 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
22 | 22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 23 | +#include "llvm/CodeGen/MachineFrameInfo.h" |
23 | 24 | #include "llvm/CodeGen/MachineInstr.h" |
24 | 25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
25 | 26 | #include "llvm/CodeGen/MachineRegionInfo.h" |
@@ -60,6 +61,10 @@ cl::opt<bool> CombineVecShiftByZero( |
60 | 61 | "aie-combine-vec-shift-by-zero", cl::init(true), cl::Hidden, |
61 | 62 | cl::desc("Combine vectors shift by zero into copies.")); |
62 | 63 |
|
| 64 | +cl::opt<bool> MemsetOptimizations( |
| 65 | + "aie-optimize-memsets", cl::init(true), cl::Hidden, |
| 66 | + cl::desc("Apply memset optimizations (peeling/align/etc.).")); |
| 67 | + |
63 | 68 | static unsigned getNumMaskUndefs(const ArrayRef<int> &Mask, |
64 | 69 | unsigned StartIndex) { |
65 | 70 | unsigned Count = 0; |
@@ -3787,3 +3792,65 @@ bool llvm::matchPhiOfUndef(MachineInstr &Phi, MachineRegisterInfo &MRI, |
3787 | 3792 |
|
3788 | 3793 | return true; |
3789 | 3794 | } |
| 3795 | + |
| 3796 | +// If we have enough bytes set on a stack memset, we can simply |
| 3797 | +// align this stack object to avoid store scalarization during |
| 3798 | +// legalization. |
| 3799 | +bool llvm::matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI, |
| 3800 | + const AIEBaseInstrInfo &TII, |
| 3801 | + GISelChangeObserver &Observer, |
| 3802 | + BuildFnTy &MatchInfo) { |
| 3803 | + assert(MI.getOpcode() == TargetOpcode::G_MEMSET && "Expected a G_MEMSET"); |
| 3804 | + |
| 3805 | + if (!MemsetOptimizations) |
| 3806 | + return false; |
| 3807 | + |
| 3808 | + // Try to keep alignment increase as minimum. |
| 3809 | + const unsigned BasicVectorByteSize = TII.getBasicVecRegSize() / 8; |
| 3810 | + // Half vectors are also supported. |
| 3811 | + const unsigned HalfVectorByteSize = BasicVectorByteSize / 2; |
| 3812 | + |
| 3813 | + const Register CountReg = MI.getOperand(2).getReg(); |
| 3814 | + auto Cst = getIConstantVRegValWithLookThrough(CountReg, MRI); |
| 3815 | + if (!Cst) |
| 3816 | + return false; |
| 3817 | + const unsigned ByteCount = Cst->Value.getZExtValue(); |
| 3818 | + |
| 3819 | + // Can we fill, at least, half of a basic vector? |
| 3820 | + if (ByteCount < HalfVectorByteSize) |
| 3821 | + return false; |
| 3822 | + |
| 3823 | + const Register PtrReg = MI.getOperand(0).getReg(); |
| 3824 | + const MachineInstr *DefDataInst = MRI.getUniqueVRegDef(PtrReg); |
| 3825 | + |
| 3826 | + if (DefDataInst->getOpcode() != TargetOpcode::G_FRAME_INDEX) |
| 3827 | + return false; |
| 3828 | + |
| 3829 | + if (MI.memoperands_empty()) |
| 3830 | + return false; |
| 3831 | + MachineMemOperand *MMO = MI.memoperands().front(); |
| 3832 | + |
| 3833 | + const int FrameIndex = DefDataInst->getOperand(1).getIndex(); |
| 3834 | + |
| 3835 | + const Align OptimalAlign = |
| 3836 | + Align(ByteCount < BasicVectorByteSize ? HalfVectorByteSize |
| 3837 | + : BasicVectorByteSize); |
| 3838 | + const Align MMOAlign = MMO->getAlign(); |
| 3839 | + |
| 3840 | + if (MMOAlign == OptimalAlign) |
| 3841 | + return false; |
| 3842 | + |
| 3843 | + MatchInfo = [=, &MI](MachineIRBuilder &B) { |
| 3844 | + MachineFunction *MF = MI.getMF(); |
| 3845 | + MachineFrameInfo &MFI = MF->getFrameInfo(); |
| 3846 | + MFI.setObjectAlignment(FrameIndex, OptimalAlign); |
| 3847 | + const LocationSize Size = MMO->getSize(); |
| 3848 | + MI.dropMemRefs(*MF); |
| 3849 | + MI.addMemOperand(*MF, |
| 3850 | + MF->getMachineMemOperand( |
| 3851 | + MachinePointerInfo::getFixedStack(*MF, FrameIndex), |
| 3852 | + MachineMemOperand::MOStore, Size, OptimalAlign)); |
| 3853 | + }; |
| 3854 | + |
| 3855 | + return true; |
| 3856 | +} |
0 commit comments