@@ -5521,11 +5521,18 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
55215521
55225522 Register PtrBase;
55235523 int64_t ConstOffset;
5524- std::tie (PtrBase, ConstOffset) =
5524+ bool IsInBounds;
5525+ std::tie (PtrBase, ConstOffset, IsInBounds) =
55255526 getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
55265527
5527- if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
5528- !isFlatScratchBaseLegal (Root.getReg ())))
5528+ // Adding the offset to the base address with an immediate in a FLAT
5529+ // instruction must not change the memory aperture in which the address falls.
5530+ // Therefore we can only fold offsets from inbounds GEPs into FLAT
5531+ // instructions.
5532+ if (ConstOffset == 0 ||
5533+ (FlatVariant == SIInstrFlags::FlatScratch &&
5534+ !isFlatScratchBaseLegal (Root.getReg ())) ||
5535+ (FlatVariant == SIInstrFlags::FLAT && !IsInBounds))
55295536 return Default;
55305537
55315538 unsigned AddrSpace = (*MI->memoperands_begin ())->getAddrSpace ();
@@ -5577,7 +5584,8 @@ AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root,
55775584
55785585 // Match the immediate offset first, which canonically is moved as low as
55795586 // possible.
5580- std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (Addr, *MRI);
5587+ std::tie (PtrBase, ConstOffset, std::ignore) =
5588+ getPtrBaseWithConstantOffset (Addr, *MRI);
55815589
55825590 if (ConstOffset != 0 ) {
55835591 if (NeedIOffset &&
@@ -5760,7 +5768,8 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
57605768
57615769 // Match the immediate offset first, which canonically is moved as low as
57625770 // possible.
5763- std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (Addr, *MRI);
5771+ std::tie (PtrBase, ConstOffset, std::ignore) =
5772+ getPtrBaseWithConstantOffset (Addr, *MRI);
57645773
57655774 if (ConstOffset != 0 && isFlatScratchBaseLegal (Addr) &&
57665775 TII.isLegalFLATOffset (ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
@@ -5836,7 +5845,8 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
58365845
58375846 // Match the immediate offset first, which canonically is moved as low as
58385847 // possible.
5839- std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (Addr, *MRI);
5848+ std::tie (PtrBase, ConstOffset, std::ignore) =
5849+ getPtrBaseWithConstantOffset (Addr, *MRI);
58405850
58415851 Register OrigAddr = Addr;
58425852 if (ConstOffset != 0 &&
@@ -5942,7 +5952,8 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
59425952 const MachineInstr *RootDef = MRI->getVRegDef (Root.getReg ());
59435953 Register PtrBase;
59445954 int64_t ConstOffset;
5945- std::tie (PtrBase, ConstOffset) = getPtrBaseWithConstantOffset (VAddr, *MRI);
5955+ std::tie (PtrBase, ConstOffset, std::ignore) =
5956+ getPtrBaseWithConstantOffset (VAddr, *MRI);
59465957 if (ConstOffset != 0 ) {
59475958 if (TII.isLegalMUBUFImmOffset (ConstOffset) &&
59485959 (!STI.privateMemoryResourceIsRangeChecked () ||
@@ -6181,8 +6192,8 @@ AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const
61816192
61826193 Register PtrBase;
61836194 int64_t Offset;
6184- std::tie (PtrBase, Offset) =
6185- getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
6195+ std::tie (PtrBase, Offset, std::ignore ) =
6196+ getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
61866197
61876198 if (Offset) {
61886199 if (isDSOffsetLegal (PtrBase, Offset)) {
@@ -6243,8 +6254,8 @@ AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,
62436254
62446255 Register PtrBase;
62456256 int64_t Offset;
6246- std::tie (PtrBase, Offset) =
6247- getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
6257+ std::tie (PtrBase, Offset, std::ignore ) =
6258+ getPtrBaseWithConstantOffset (Root.getReg (), *MRI);
62486259
62496260 if (Offset) {
62506261 int64_t OffsetValue0 = Offset;
@@ -6265,22 +6276,25 @@ AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,
62656276}
62666277
62676278// / If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
6268- // / the base value with the constant offset. There may be intervening copies
6269- // / between \p Root and the identified constant. Returns \p Root, 0 if this does
6270- // / not match the pattern.
6271- std::pair<Register, int64_t >
6279+ // / the base value with the constant offset, and if the offset computation is
6280+ // / known to be inbounds. There may be intervening copies between \p Root and
6281+ // / the identified constant. Returns \p Root, 0, false if this does not match
6282+ // / the pattern.
6283+ std::tuple<Register, int64_t , bool >
62726284AMDGPUInstructionSelector::getPtrBaseWithConstantOffset (
6273- Register Root, const MachineRegisterInfo &MRI) const {
6285+ Register Root, const MachineRegisterInfo &MRI) const {
62746286 MachineInstr *RootI = getDefIgnoringCopies (Root, MRI);
62756287 if (RootI->getOpcode () != TargetOpcode::G_PTR_ADD)
6276- return {Root, 0 };
6288+ return {Root, 0 , false };
62776289
62786290 MachineOperand &RHS = RootI->getOperand (2 );
62796291 std::optional<ValueAndVReg> MaybeOffset =
62806292 getIConstantVRegValWithLookThrough (RHS.getReg (), MRI);
62816293 if (!MaybeOffset)
6282- return {Root, 0 };
6283- return {RootI->getOperand (1 ).getReg (), MaybeOffset->Value .getSExtValue ()};
6294+ return {Root, 0 , false };
6295+ bool IsInBounds = RootI->getFlag (MachineInstr::MIFlag::InBounds);
6296+ return {RootI->getOperand (1 ).getReg (), MaybeOffset->Value .getSExtValue (),
6297+ IsInBounds};
62846298}
62856299
62866300static void addZeroImm (MachineInstrBuilder &MIB) {
@@ -6358,7 +6372,8 @@ AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {
63586372 Register PtrBase;
63596373 int64_t Offset;
63606374
6361- std::tie (PtrBase, Offset) = getPtrBaseWithConstantOffset (Src, *MRI);
6375+ std::tie (PtrBase, Offset, std::ignore) =
6376+ getPtrBaseWithConstantOffset (Src, *MRI);
63626377 if (isUInt<32 >(Offset)) {
63636378 Data.N0 = PtrBase;
63646379 Data.Offset = Offset;
0 commit comments