Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,13 @@ void MCResourceInfo::assignResourceInfoExpr(
for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;
if (!F.doesNotRecurse() && !Callee->doesNotRecurse())
continue;
MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
if (ArgExprs.size() > 1)
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
Expand Down Expand Up @@ -164,6 +167,8 @@ void MCResourceInfo::gatherResourceInfo(
for (const Function *Callee : FRI.Callees) {
if (!Seen.insert(Callee).second)
continue;
if (!MF.getFunction().doesNotRecurse() && !Callee->doesNotRecurse())
continue;
if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,4 @@ bb:
; GCN-NEXT: .set amdgpu.max_num_agpr, 32
; GCN-NEXT: .set amdgpu.max_num_sgpr, 34

attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ bb:
; GFX908: agpr_count: 0x20
; GFX908: vgpr_count: 0x20

attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" }
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,79 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
ret void
}

; GCN-LABEL: {{^}}multi_stage_recurse2:
; GCN: .set multi_stage_recurse2.num_vgpr, 41
; GCN: .set multi_stage_recurse2.num_agpr, 0
; GCN: .set multi_stage_recurse2.numbered_sgpr, 34
; GCN: .set multi_stage_recurse2.private_seg_size, 16
; GCN: .set multi_stage_recurse2.uses_vcc, 1
; GCN: .set multi_stage_recurse2.uses_flat_scratch, 0
; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, 0
; GCN: .set multi_stage_recurse2.has_recursion, 1
; GCN: .set multi_stage_recurse2.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
; GCN-LABEL: {{^}}multi_stage_recurse1:
; GCN: .set multi_stage_recurse1.num_vgpr, 41
; GCN: .set multi_stage_recurse1.num_agpr, 0
; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
; GCN: .set multi_stage_recurse1.private_seg_size, 16
; GCN: .set multi_stage_recurse1.uses_vcc, 1
; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
; GCN: .set multi_stage_recurse1.has_recursion, 1
; GCN: .set multi_stage_recurse1.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define void @multi_stage_recurse1(i32 %val) #2 {
call void @multi_stage_recurse2(i32 %val)
ret void
}
define void @multi_stage_recurse2(i32 %val) #2 {
call void @multi_stage_recurse1(i32 %val)
ret void
}

; GCN-LABEL: {{^}}usage_multi_stage_recurse:
; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
call void @multi_stage_recurse1(i32 %n)
ret void
}

; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
; GCN: TotalNumSgprs: 49
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
call void @use_stack0()
call void @use_stack1()
call void @multi_stage_recurse1(i32 %n)
ret void
}

; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ bb:
ret void
}

attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/ipra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,6 @@ bb:
declare dso_local void @eggs()


attributes #0 = { nounwind }
attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #0 = { nounwind norecurse }
attributes #1 = { nounwind noinline norecurse "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #2 = { norecurse }
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/recursion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ define void @tail_recursive_with_stack() {
; For an arbitrary recursive call, report a large number for unknown stack
; usage for code object v4 and older
; CHECK-LABEL: {{^}}calls_recursive:
; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size))
; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}calls_recursive:
; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size))
; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack)
; V5: .set calls_recursive.private_seg_size, 0
; V5: .set calls_recursive.has_dyn_sized_stack, 0
define amdgpu_kernel void @calls_recursive() {
call void @recursive()
ret void
Expand All @@ -65,22 +65,22 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() {
; in the kernel.

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive:
; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size))
; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive:
; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size))
; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion)
; V5: .set kernel_calls_tail_recursive.private_seg_size, 0
; V5: .set kernel_calls_tail_recursive.has_recursion, 1
define amdgpu_kernel void @kernel_calls_tail_recursive() {
call void @tail_recursive()
ret void
}

; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size))
; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384))
;
; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack:
; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size))
; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack)
; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0
; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, 0
define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() {
call void @tail_recursive_with_stack()
ret void
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/AMDGPU/recursive-mcexpr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -mcpu=gfx90a < %s | FileCheck %s

; CHECK-LABEL: {{^}}qux
; CHECK: .set qux.num_vgpr, 41
; CHECK: .set qux.num_agpr, 0
; CHECK: .set qux.numbered_sgpr, 34
; CHECK: .set qux.private_seg_size, 16
; CHECK: .set qux.uses_vcc, 1
; CHECK: .set qux.uses_flat_scratch, 0
; CHECK: .set qux.has_dyn_sized_stack, 0
; CHECK: .set qux.has_recursion, 1
; CHECK: .set qux.has_indirect_call, 0

; CHECK-LABEL: {{^}}baz
; CHECK: .set baz.num_vgpr, 42
; CHECK: .set baz.num_agpr, 0
; CHECK: .set baz.numbered_sgpr, 34
; CHECK: .set baz.private_seg_size, 16
; CHECK: .set baz.uses_vcc, 1
; CHECK: .set baz.uses_flat_scratch, 0
; CHECK: .set baz.has_dyn_sized_stack, 0
; CHECK: .set baz.has_recursion, 1
; CHECK: .set baz.has_indirect_call, 0

; CHECK-LABEL: {{^}}bar
; CHECK: .set bar.num_vgpr, 42
; CHECK: .set bar.num_agpr, 0
; CHECK: .set bar.numbered_sgpr, 34
; CHECK: .set bar.private_seg_size, 16
; CHECK: .set bar.uses_vcc, 1
; CHECK: .set bar.uses_flat_scratch, 0
; CHECK: .set bar.has_dyn_sized_stack, 0
; CHECK: .set bar.has_recursion, 1
; CHECK: .set bar.has_indirect_call, 0

; CHECK-LABEL: {{^}}foo
; CHECK: .set foo.num_vgpr, 42
; CHECK: .set foo.num_agpr, 0
; CHECK: .set foo.numbered_sgpr, 34
; CHECK: .set foo.private_seg_size, 16
; CHECK: .set foo.uses_vcc, 1
; CHECK: .set foo.uses_flat_scratch, 0
; CHECK: .set foo.has_dyn_sized_stack, 0
; CHECK: .set foo.has_recursion, 1
; CHECK: .set foo.has_indirect_call, 0

define void @foo() {
entry:
call void @bar()
ret void
}

define void @bar() {
entry:
call void @baz()
ret void
}

define void @baz() {
entry:
call void @qux()
ret void
}

define void @qux() {
entry:
call void @foo()
ret void
}

; CHECK-LABEL: {{^}}usefoo
; CHECK: .set usefoo.num_vgpr, 32
; CHECK: .set usefoo.num_agpr, 0
; CHECK: .set usefoo.numbered_sgpr, 33
; CHECK: .set usefoo.private_seg_size, 0
; CHECK: .set usefoo.uses_vcc, 1
; CHECK: .set usefoo.uses_flat_scratch, 1
; CHECK: .set usefoo.has_dyn_sized_stack, 0
; CHECK: .set usefoo.has_recursion, 1
; CHECK: .set usefoo.has_indirect_call, 0
define amdgpu_kernel void @usefoo() {
call void @foo()
ret void
}

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -468,5 +468,5 @@ entry:
ret <2 x i64> %ret
}

attributes #0 = { nounwind }
attributes #1 = { nounwind noinline "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
attributes #0 = { nounwind norecurse }
attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
Loading