Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
304 changes: 291 additions & 13 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Large diffs are not rendered by default.

30 changes: 29 additions & 1 deletion llvm/lib/Target/AMDGPU/SIFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
Register FramePtrRegScratchCopy,
const bool NeedsFrameMoves) const;
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LiveRegUnits &LiveUnits, Register FrameReg,
Expand Down Expand Up @@ -101,6 +102,15 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
Register PreloadedPrivateBufferReg, Register ScratchRsrcReg,
Register ScratchWaveOffsetReg) const;

void emitPrologueEntryCFI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL) const;

void emitDefCFA(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc const &DL, Register StackPtrReg,
bool AspaceAlreadyDefined,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const;

public:
bool requiresStackPointerReference(const MachineFunction &MF) const;

Expand All @@ -110,6 +120,24 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;

/// Create a CFI index describing a spill of an SGPR to a single lane of
/// a VGPR and build a MachineInstr around it.
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
const Register SGPR,
const Register VGPR,
const int Lane) const;
/// Create a CFI index describing a spill of an SGPR to multiple lanes of
/// VGPRs and build a MachineInstr around it.
MachineInstr *buildCFIForSGPRToVGPRSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register SGPR,
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register Reg,
Register SGPRPair) const;
// Returns true if the function may need to reserve space on the stack for the
// CWSR trap handler.
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) #0
declare hidden void @external_void_func_byval(ptr addrspace(5) byval([16 x i32])) #0

define amdgpu_kernel void @kernel_caller_stack() {
define amdgpu_kernel void @kernel_caller_stack() #2 {
; MUBUF-LABEL: kernel_caller_stack:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s12, s17
Expand Down Expand Up @@ -57,7 +57,7 @@ define amdgpu_kernel void @kernel_caller_stack() {
ret void
}

define amdgpu_kernel void @kernel_caller_byval() {
define amdgpu_kernel void @kernel_caller_byval() #2 {
; MUBUF-LABEL: kernel_caller_byval:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_add_u32 flat_scratch_lo, s12, s17
Expand Down Expand Up @@ -213,7 +213,7 @@ define amdgpu_kernel void @kernel_caller_byval() {
ret void
}

define void @func_caller_stack() {
define void @func_caller_stack() #2 {
; MUBUF-LABEL: func_caller_stack:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -291,7 +291,7 @@ define void @func_caller_stack() {
ret void
}

define void @func_caller_byval(ptr addrspace(5) %argptr) {
define void @func_caller_byval(ptr addrspace(5) %argptr) #2 {
; MUBUF-LABEL: func_caller_byval:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -432,3 +432,4 @@ declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32,

attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #1 = { argmemonly nofree nounwind willreturn writeonly }
attributes #2 = { nounwind }
14 changes: 8 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

@gv = external addrspace(4) constant i32

define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) #0 {
; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0
Expand Down Expand Up @@ -63,7 +63,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
ret void
}

define void @func_dynamic_stackalloc_sgpr_align4() {
define void @func_dynamic_stackalloc_sgpr_align4() #0 {
; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -146,7 +146,7 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
ret void
}

define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) #0 {
; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0
Expand Down Expand Up @@ -204,7 +204,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
ret void
}

define void @func_dynamic_stackalloc_sgpr_align16() {
define void @func_dynamic_stackalloc_sgpr_align16() #0 {
; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -287,7 +287,7 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
ret void
}

define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) #0 {
; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s4, s[8:9], 0x0
Expand Down Expand Up @@ -348,7 +348,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
ret void
}

define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -445,3 +445,5 @@ define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
store i32 0, ptr addrspace(5) %alloca
ret void
}

attributes #0 = { nounwind }
10 changes: 6 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; Test the localizer did something and we don't materialize all
; constants in SGPRs in the entry block.

define amdgpu_kernel void @localize_constants(i1 %cond) {
define amdgpu_kernel void @localize_constants(i1 %cond) #0 {
; GFX9-LABEL: localize_constants:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0
Expand Down Expand Up @@ -91,7 +91,7 @@ bb2:
@gv2 = addrspace(1) global i32 poison, align 4
@gv3 = addrspace(1) global i32 poison, align 4

define amdgpu_kernel void @localize_globals(i1 %cond) {
define amdgpu_kernel void @localize_globals(i1 %cond) #0 {
; GFX9-LABEL: localize_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0
Expand Down Expand Up @@ -162,7 +162,7 @@ bb2:
@static.gv2 = internal addrspace(1) global i32 poison, align 4
@static.gv3 = internal addrspace(1) global i32 poison, align 4

define void @localize_internal_globals(i1 %cond) {
define void @localize_internal_globals(i1 %cond) #0 {
; GFX9-LABEL: localize_internal_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -226,7 +226,7 @@ bb2:
}

; This would crash from using the wrong insert point
define void @sink_null_insert_pt(ptr addrspace(4) %arg0) {
define void @sink_null_insert_pt(ptr addrspace(4) %arg0) #0 {
; GFX9-LABEL: sink_null_insert_pt:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -262,3 +262,5 @@ bb1:
call void null()
ret void
}

attributes #0 = { nounwind }
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an
; alignment less than the stack alignment.
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) #1 {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[8:9], 0x8
Expand Down Expand Up @@ -82,7 +82,7 @@ bb.2:
; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
; ASSUME1024: ; ScratchSize: 1040

define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) #1 {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[8:9], 0x8
Expand Down Expand Up @@ -146,7 +146,7 @@ bb.1:
; ASSUME1024: ; ScratchSize: 1088


define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) #1 {
; GCN-LABEL: func_non_entry_block_static_alloca_align4:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -210,7 +210,7 @@ bb.2:
ret void
}

define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) #1 {
; GCN-LABEL: func_non_entry_block_static_alloca_align64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -272,6 +272,7 @@ bb.1:
declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind }

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; ASSUME1024: {{.*}}
Expand Down
Loading
Loading