Skip to content

Commit 3672489

Browse files
committed
AMDGPU: Clear offset register when using local stack area
eliminateFrameIndex won't fix up the offset register when the direct frame index reference is moved to a separate move instruction. Switch the offset to a base 0 (which it probably should be to begin with).
1 parent deae5e5 commit 3672489

File tree

3 files changed

+15
-8
lines changed

3 files changed

+15
-8
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
503503
#endif
504504
assert(FIOp && FIOp->isFI() && "frame index must be address operand");
505505
assert(TII->isMUBUF(MI));
506-
assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() ==
507-
MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
506+
507+
MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
508+
assert(SOffset->getReg() ==
509+
MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
508510
"should only be seeing stack pointer offset relative FrameIndex");
509511

510512
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
@@ -513,6 +515,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
513515

514516
FIOp->ChangeToRegister(BaseReg, false);
515517
OffsetOp->setImm(NewOffset);
518+
519+
// The move materializing the base address will be an absolute stack address,
520+
// so clear the base offset.
521+
SOffset->ChangeToImmediate(0);
516522
}
517523

518524
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,

llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8
4141
; GCN-NEXT: v_add_u32_e32 v1, 0x20d0, v1
4242
; GCN-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen
4343
; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4
44-
; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], s32 offen
45-
; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], s32 offen offset:4
44+
; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
45+
; GCN-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen offset:4
4646
; GCN-NEXT: s_waitcnt vmcnt(1)
4747
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v2, v3
4848
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -94,8 +94,8 @@ define void @func_local_stack_offset_uses_sp(i64 addrspace(1)* %out, i8 addrspac
9494
; GCN-NEXT: v_add_u32_e32 v3, 0x20d0, v3
9595
; GCN-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen
9696
; GCN-NEXT: buffer_load_dword v3, v3, s[0:3], 0 offen offset:4
97-
; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], s32 offen
98-
; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], s32 offen offset:4
97+
; GCN-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen
98+
; GCN-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:4
9999
; GCN-NEXT: s_sub_u32 s32, s32, 0x180000
100100
; GCN-NEXT: s_mov_b32 s33, s5
101101
; GCN-NEXT: s_waitcnt vmcnt(1)

llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
4141
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
4242
; GCN-NEXT: s_cbranch_execz BB0_2
4343
; GCN-NEXT: ; %bb.1: ; %if.then4.i
44-
; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], s32 offen
45-
; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], s32 offen offset:4
44+
; GCN-NEXT: s_clause 0x1
45+
; GCN-NEXT: buffer_load_dword v0, v40, s[36:39], 0 offen
46+
; GCN-NEXT: buffer_load_dword v1, v40, s[36:39], 0 offen offset:4
4647
; GCN-NEXT: s_waitcnt vmcnt(0)
4748
; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v0
4849
; GCN-NEXT: v_mul_lo_u32 v0, 0x41c64e6d, v0

0 commit comments

Comments
 (0)