|
28 | 28 | #include "llvm/CodeGen/TargetOpcodes.h" |
29 | 29 | #include "llvm/IR/IntrinsicsAIE2.h" |
30 | 30 | #include "llvm/IR/IntrinsicsAIE2P.h" |
| 31 | +#include "llvm/Support/Alignment.h" |
31 | 32 | #include "llvm/Support/ErrorHandling.h" |
32 | 33 | #include <optional> |
33 | 34 |
|
@@ -3854,3 +3855,177 @@ bool llvm::matchAlignMemset(MachineInstr &MI, MachineRegisterInfo &MRI, |
3854 | 3855 |
|
3855 | 3856 | return true; |
3856 | 3857 | } |
| 3858 | + |
| 3859 | +static std::optional<std::pair<Register, int64_t>> |
| 3860 | +getPtrAndConstantOffset(const MachineInstr *MI, unsigned PointerIndex, |
| 3861 | + MachineRegisterInfo &MRI) { |
| 3862 | + assert(MI->getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD"); |
| 3863 | + |
| 3864 | + const Register OffsetReg = MI->getOperand(2).getReg(); |
| 3865 | + const auto Cst = getIConstantVRegValWithLookThrough(OffsetReg, MRI); |
| 3866 | + |
| 3867 | + if (Cst) |
| 3868 | + return std::make_pair(MI->getOperand(PointerIndex).getReg(), |
| 3869 | + Cst->Value.getSExtValue()); |
| 3870 | + |
| 3871 | + return std::nullopt; |
| 3872 | +} |
| 3873 | + |
| 3874 | +template <uint64_t TargetAlign> constexpr bool matchAlignment(uint64_t Value) { |
| 3875 | + return isAligned(Align(TargetAlign), Value); |
| 3876 | +} |
| 3877 | + |
| 3878 | +static bool isBasePointerWordAligned(Register BasePtr, |
| 3879 | + MachineRegisterInfo &MRI) { |
| 3880 | + |
| 3881 | + auto IsBasePointerAligned = [&](const MachineInstr *MI) { |
| 3882 | + if (!isa<GLoadStore>(MI)) |
| 3883 | + return false; |
| 3884 | + if (MI->memoperands_empty()) |
| 3885 | + return false; |
| 3886 | + const MachineMemOperand *MMO = MI->memoperands().front(); |
| 3887 | + return matchAlignment<4>(MMO->getAlign().value()); |
| 3888 | + }; |
| 3889 | + |
| 3890 | + for (const MachineInstr &MI : MRI.use_instructions(BasePtr)) { |
| 3891 | + if (MI.getOpcode() == TargetOpcode::G_PTR_ADD) { |
| 3892 | + auto RegAndOffset = getPtrAndConstantOffset(&MI, 0, MRI); |
| 3893 | + if (!RegAndOffset || !matchAlignment<4>(RegAndOffset->second)) |
| 3894 | + continue; |
| 3895 | + // In one user is aligned, it is enough for us. |
| 3896 | + if (any_of(MRI.use_instructions(RegAndOffset->first), |
| 3897 | + [&](const MachineInstr &UseMI) { |
| 3898 | + return IsBasePointerAligned(&UseMI); |
| 3899 | + })) |
| 3900 | + return true; |
| 3901 | + } else if (IsBasePointerAligned(&MI)) { |
| 3902 | + return true; |
| 3903 | + } |
| 3904 | + } |
| 3905 | + |
| 3906 | + return false; |
| 3907 | +} |
| 3908 | + |
| 3909 | +// We try to align MEMSETs by peeling out some stores. |
| 3910 | +// To be effective, we need at least 3 bytes here: |
| 3911 | +// If we are byte-aligned we can generate |
| 3912 | +// - G_STORE s8 |
| 3913 | +// - G_STORE s16 |
| 3914 | +// - G_MEMSET (... n - 3 ...) |
| 3915 | +// If we are short-aligned we can generate |
| 3916 | +// - G_STORE s16 |
| 3917 | +// - G_MEMSET (... n - 2 ...) |
| 3918 | +bool llvm::matchPeelMemset(MachineInstr &MI, MachineRegisterInfo &MRI, |
| 3919 | + const AIEBaseInstrInfo &TII, |
| 3920 | + GISelChangeObserver &Observer, |
| 3921 | + BuildFnTy &MatchInfo) { |
| 3922 | + assert(MI.getOpcode() == TargetOpcode::G_MEMSET && "Expected a G_MEMSET"); |
| 3923 | + |
| 3924 | + if (!MemsetOptimizations) |
| 3925 | + return false; |
| 3926 | + |
| 3927 | + MachineMemOperand *MMO = MI.memoperands().front(); |
| 3928 | + |
| 3929 | + if (!MMO) |
| 3930 | + return false; |
| 3931 | + const Align MMOAlign = MMO->getAlign(); |
| 3932 | + |
| 3933 | + // If it is already aligned we have nothing to do. |
| 3934 | + if (matchAlignment<4>(MMOAlign.value())) |
| 3935 | + return false; |
| 3936 | + |
| 3937 | + const Register SizeReg = MI.getOperand(2).getReg(); |
| 3938 | + |
| 3939 | + const auto Cst = getIConstantVRegValWithLookThrough(SizeReg, MRI); |
| 3940 | + if (!Cst) |
| 3941 | + return false; |
| 3942 | + const int64_t Size = Cst->Value.getSExtValue(); |
| 3943 | + |
| 3944 | + Register PtrReg = MI.getOperand(0).getReg(); |
| 3945 | + const Register DataReg = MI.getOperand(1).getReg(); |
| 3946 | + const auto CstInit = getIConstantVRegValWithLookThrough(DataReg, MRI); |
| 3947 | + if (!CstInit) |
| 3948 | + return false; |
| 3949 | + const uint64_t Initializer = CstInit->Value.getZExtValue(); |
| 3950 | + |
| 3951 | + unsigned Offset = 0; |
| 3952 | + const MachineInstr *DefPtrReg = MRI.getVRegDef(PtrReg); |
| 3953 | + if (DefPtrReg->getOpcode() == TargetOpcode::G_PTR_ADD) { |
| 3954 | + const auto RegAndOffset = getPtrAndConstantOffset(DefPtrReg, 1, MRI); |
| 3955 | + |
| 3956 | + if (!RegAndOffset) |
| 3957 | + return false; |
| 3958 | + |
| 3959 | + Offset = RegAndOffset->second; |
| 3960 | + PtrReg = RegAndOffset->first; |
| 3961 | + } |
| 3962 | + |
| 3963 | + // Next step is to prove that the base pointer is word-aligned. |
| 3964 | + // As we cannot assume, we can search for aligned uses of the base pointer. |
| 3965 | + if (!isBasePointerWordAligned(PtrReg, MRI)) |
| 3966 | + return false; |
| 3967 | + |
| 3968 | + MatchInfo = [=, &MI, &MRI, &Observer](MachineIRBuilder &B) { |
| 3969 | + auto &MF = B.getMF(); |
| 3970 | + |
| 3971 | + auto BuildPADD = [&](int64_t CurrentOffset) { |
| 3972 | + Register NewPtrReg = MRI.cloneVirtualRegister(PtrReg); |
| 3973 | + Register OffsetReg = |
| 3974 | + B.buildConstant(LLT::scalar(20), CurrentOffset).getReg(0); |
| 3975 | + Observer.createdInstr(*B.buildInstr(TargetOpcode::G_PTR_ADD) |
| 3976 | + .addDef(NewPtrReg) |
| 3977 | + .addReg(PtrReg) |
| 3978 | + .addReg(OffsetReg)); |
| 3979 | + return NewPtrReg; |
| 3980 | + }; |
| 3981 | + |
| 3982 | + auto BuildMMO = [&](LocationSize Size, Align A) { |
| 3983 | + MachineMemOperand *NewMMO = MF.getMachineMemOperand( |
| 3984 | + MMO->getPointerInfo(), MMO->getFlags(), Size, A); |
| 3985 | + return NewMMO; |
| 3986 | + }; |
| 3987 | + |
| 3988 | + int64_t PeelOffset = Offset; |
| 3989 | + // If offset is aligned, just fix the alignment. |
| 3990 | + if (!matchAlignment<4>(PeelOffset)) { |
| 3991 | + // If not short-aligned, align to the next short boundary (1 byte). |
| 3992 | + if (!matchAlignment<2>(PeelOffset)) { |
| 3993 | + B.buildStore(DataReg, BuildPADD(PeelOffset), |
| 3994 | + *BuildMMO(MMO->getSize(), Align(1))); |
| 3995 | + PeelOffset++; |
| 3996 | + } |
| 3997 | + |
| 3998 | + // If we are short-aligned, but still not word aligned, align to the |
| 3999 | + // next word boundary (2 bytes more). |
| 4000 | + if (!matchAlignment<4>(PeelOffset)) { |
| 4001 | + // Store the next two bytes to align to the next word boundary. |
| 4002 | + Register DataRegAdjustedToS16 = |
| 4003 | + B.buildConstant(LLT::scalar(16), (Initializer << 8) | Initializer) |
| 4004 | + .getReg(0); |
| 4005 | + B.buildStore(DataRegAdjustedToS16, BuildPADD(PeelOffset), |
| 4006 | + *BuildMMO(2, Align(2))); |
| 4007 | + PeelOffset += 2; |
| 4008 | + } |
| 4009 | + } |
| 4010 | + |
| 4011 | + const unsigned NewSize = Size - (PeelOffset - Offset); |
| 4012 | + |
| 4013 | + // No bytes left to memset. |
| 4014 | + if (NewSize == 0) { |
| 4015 | + MI.eraseFromParent(); |
| 4016 | + return; |
| 4017 | + } |
| 4018 | + |
| 4019 | + const int64_t MemsetOffset = PeelOffset; |
| 4020 | + assert(matchAlignment<4>(MemsetOffset) && "Memset still unaligned?"); |
| 4021 | + // Now, what remains is aligned, we just need to fix Offset, Size and MMO. |
| 4022 | + MachineMemOperand *NewMMOMemSet = BuildMMO(MMO->getSize(), Align(4)); |
| 4023 | + MI.dropMemRefs(MF); // Safe to drop the MMO now. |
| 4024 | + MI.addMemOperand(MF, NewMMOMemSet); |
| 4025 | + MI.getOperand(2).setReg( |
| 4026 | + B.buildConstant(LLT::scalar(20), NewSize).getReg(0)); |
| 4027 | + MI.getOperand(0).setReg(BuildPADD(MemsetOffset)); |
| 4028 | + }; |
| 4029 | + |
| 4030 | + return true; |
| 4031 | +} |
0 commit comments