Skip to content
88 changes: 78 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,30 +91,83 @@ MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
}

// The expression should have no recursion in it. Test a (sub-)expression to see
// if it needs to be further visited, or if a recursion has been found. Returns
// true if Sym is found within Expr (i.e., has a recurrance of Sym found), false
// otherwise.
static bool findSymbolInExpr(MCSymbol *Sym, const MCExpr *Expr,
SmallPtrSetImpl<const MCExpr *> &Exprs) {
switch (Expr->getKind()) {
default:
return false;
case MCExpr::ExprKind::SymbolRef: {
const MCSymbolRefExpr *SymRefExpr = cast<MCSymbolRefExpr>(Expr);
const MCSymbol &SymRef = SymRefExpr->getSymbol();
if (Sym == &SymRef)
return true;
if (SymRef.isVariable())
Exprs.insert(SymRef.getVariableValue(/*isUsed=*/false));
return false;
}
case MCExpr::ExprKind::Binary: {
const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
Exprs.insert(BExpr->getLHS());
Exprs.insert(BExpr->getRHS());
return false;
}
case MCExpr::ExprKind::Unary: {
const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
Exprs.insert(UExpr->getSubExpr());
return false;
}
case MCExpr::ExprKind::Target: {
const AMDGPUMCExpr *AGVK = cast<AMDGPUMCExpr>(Expr);
for (const MCExpr *E : AGVK->getArgs())
Exprs.insert(E);
return false;
}
}
}

void MCResourceInfo::assignResourceInfoExpr(
int64_t LocalValue, ResourceInfoKind RIK, AMDGPUMCExpr::VariantKind Kind,
const MachineFunction &MF, const SmallVectorImpl<const Function *> &Callees,
MCContext &OutContext) {
const MCConstantExpr *LocalConstExpr =
MCConstantExpr::create(LocalValue, OutContext);
const MCExpr *SymVal = LocalConstExpr;
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
if (!Callees.empty()) {
SmallVector<const MCExpr *, 8> ArgExprs;
// Avoid recursive symbol assignment.
SmallPtrSet<const Function *, 8> Seen;
ArgExprs.push_back(LocalConstExpr);
const Function &F = MF.getFunction();
Seen.insert(&F);

for (const Function *Callee : Callees) {
if (!Seen.insert(Callee).second)
continue;

SmallPtrSet<const MCExpr *, 8> WorkSet;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expect to maintain the set for all callees to be visited, not redone on each one

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried so as well but I had a hard time re-retrieving the MCSymbolRefExpr from the WorkSet after walking down it and concluding it should be added to ArgExprs (I may be missing some low hanging fruit)

MCSymbol *CalleeValSym = getSymbol(Callee->getName(), RIK, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
if (CalleeValSym->isVariable())
WorkSet.insert(CalleeValSym->getVariableValue(/*IsUsed=*/false));
else
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));

bool FoundRecursion = false;
while (!WorkSet.empty() && !FoundRecursion) {
auto It = WorkSet.begin();
const MCExpr *Expr = *It;
WorkSet.erase(Expr);

FoundRecursion = findSymbolInExpr(Sym, Expr, WorkSet);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking explicitly for recursion as a case to handle still does not make sense to me. I expect a recursive visitor function that handles the recursive naturally, without trying to treat it as a special case. Have the function return null if it isn't resolvable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may not completely understand the suggested approach but the reason I explicitly add Sym is because I want to explicitly check if Sym is part of a recursive definition and not so much whether Sym has any recursion as part of its expression. Is your suggested approach compatible with seeing whether Sym is part of the recursion or not?

}

if (CalleeValSym->isVariable() && !FoundRecursion)
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
if (ArgExprs.size() > 1)
SymVal = AMDGPUMCExpr::create(Kind, ArgExprs, OutContext);
}
MCSymbol *Sym = getSymbol(MF.getName(), RIK, OutContext);
Sym->setVariableValue(SymVal);
}

Expand Down Expand Up @@ -155,6 +208,7 @@ void MCResourceInfo::gatherResourceInfo(
// The expression for private segment size should be: FRI.PrivateSegmentSize
// + max(FRI.Callees, FRI.CalleeSegmentSize)
SmallVector<const MCExpr *, 8> ArgExprs;
MCSymbol *Sym = getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext);
if (FRI.CalleeSegmentSize)
ArgExprs.push_back(
MCConstantExpr::create(FRI.CalleeSegmentSize, OutContext));
Expand All @@ -165,9 +219,24 @@ void MCResourceInfo::gatherResourceInfo(
if (!Seen.insert(Callee).second)
continue;
if (!Callee->isDeclaration()) {
MCSymbol *calleeValSym =
SmallPtrSet<const MCExpr *, 8> WorkSet;
MCSymbol *CalleeValSym =
getSymbol(Callee->getName(), RIK_PrivateSegSize, OutContext);
ArgExprs.push_back(MCSymbolRefExpr::create(calleeValSym, OutContext));
if (CalleeValSym->isVariable())
WorkSet.insert(CalleeValSym->getVariableValue(/*IsUsed=*/false));
else
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));

bool FoundRecursion = false;
while (!WorkSet.empty() && !FoundRecursion) {
auto It = WorkSet.begin();
const MCExpr *Expr = *It;
WorkSet.erase(Expr);

FoundRecursion = findSymbolInExpr(Sym, Expr, WorkSet);
}
if (CalleeValSym->isVariable() && !FoundRecursion)
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
}
}
const MCExpr *localConstExpr =
Expand All @@ -178,8 +247,7 @@ void MCResourceInfo::gatherResourceInfo(
localConstExpr =
MCBinaryExpr::createAdd(localConstExpr, transitiveExpr, OutContext);
}
getSymbol(MF.getName(), RIK_PrivateSegSize, OutContext)
->setVariableValue(localConstExpr);
Sym->setVariableValue(localConstExpr);
}

auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
Expand Down
126 changes: 126 additions & 0 deletions llvm/test/CodeGen/AMDGPU/function-resource-usage.ll
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,132 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
ret void
}

; GCN-LABEL: {{^}}multi_stage_recurse2:
; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
; GCN-LABEL: {{^}}multi_stage_recurse1:
; GCN: .set multi_stage_recurse1.num_vgpr, 41
; GCN: .set multi_stage_recurse1.num_agpr, 0
; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
; GCN: .set multi_stage_recurse1.private_seg_size, 16
; GCN: .set multi_stage_recurse1.uses_vcc, 1
; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
; GCN: .set multi_stage_recurse1.has_recursion, 1
; GCN: .set multi_stage_recurse1.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define void @multi_stage_recurse1(i32 %val) #2 {
call void @multi_stage_recurse2(i32 %val)
ret void
}
define void @multi_stage_recurse2(i32 %val) #2 {
call void @multi_stage_recurse1(i32 %val)
ret void
}

; GCN-LABEL: {{^}}usage_multi_stage_recurse:
; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
call void @multi_stage_recurse1(i32 %n)
ret void
}

; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
; GCN: TotalNumSgprs: 38
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define void @multi_stage_recurse_noattr1(i32 %val) #0 {
call void @multi_stage_recurse_noattr2(i32 %val)
ret void
}
define void @multi_stage_recurse_noattr2(i32 %val) #0 {
call void @multi_stage_recurse_noattr1(i32 %val)
ret void
}

; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
; GCN: TotalNumSgprs: 40
; GCN: NumVgprs: 41
; GCN: ScratchSize: 16
define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
call void @multi_stage_recurse_noattr1(i32 %n)
ret void
}

; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
; GCN: TotalNumSgprs: 49
; GCN: NumVgprs: 41
; GCN: ScratchSize: 2052
define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
call void @use_stack0()
call void @use_stack1()
call void @multi_stage_recurse1(i32 %n)
ret void
}

; Make sure there's no assert when a sgpr96 is used.
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
Expand Down
82 changes: 82 additions & 0 deletions llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s

; CHECK-LABEL: {{^}}qux
; CHECK: .set qux.num_vgpr, 0
; CHECK: .set qux.num_agpr, 0
; CHECK: .set qux.numbered_sgpr, 32
; CHECK: .set qux.private_seg_size, 0
; CHECK: .set qux.uses_vcc, 0
; CHECK: .set qux.uses_flat_scratch, 0
; CHECK: .set qux.has_dyn_sized_stack, 0
; CHECK: .set qux.has_recursion, 0
; CHECK: .set qux.has_indirect_call, 0
define void @qux() {
entry:
ret void
}

; CHECK-LABEL: {{^}}baz
; CHECK: .set baz.num_vgpr, max(32, qux.num_vgpr)
; CHECK: .set baz.num_agpr, max(0, qux.num_agpr)
; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr)
; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size))
; CHECK: .set baz.uses_vcc, or(0, qux.uses_vcc)
; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch)
; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack)
; CHECK: .set baz.has_recursion, or(1, qux.has_recursion)
; CHECK: .set baz.has_indirect_call, or(0, qux.has_indirect_call)
define void @baz() {
entry:
call void @qux()
ret void
}

; CHECK-LABEL: {{^}}bar
; CHECK: .set bar.num_vgpr, max(32, baz.num_vgpr, qux.num_vgpr)
; CHECK: .set bar.num_agpr, max(0, baz.num_agpr, qux.num_agpr)
; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr, qux.numbered_sgpr)
; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size, qux.private_seg_size))
; CHECK: .set bar.uses_vcc, or(0, baz.uses_vcc, qux.uses_vcc)
; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch, qux.uses_flat_scratch)
; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack, qux.has_dyn_sized_stack)
; CHECK: .set bar.has_recursion, or(1, baz.has_recursion, qux.has_recursion)
; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call, qux.has_indirect_call)
define void @bar() {
entry:
call void @baz()
call void @qux()
call void @baz()
ret void
}

; CHECK-LABEL: {{^}}foo
; CHECK: .set foo.num_vgpr, max(32, bar.num_vgpr)
; CHECK: .set foo.num_agpr, max(0, bar.num_agpr)
; CHECK: .set foo.numbered_sgpr, max(34, bar.numbered_sgpr)
; CHECK: .set foo.private_seg_size, 16+(max(bar.private_seg_size))
; CHECK: .set foo.uses_vcc, or(0, bar.uses_vcc)
; CHECK: .set foo.uses_flat_scratch, or(0, bar.uses_flat_scratch)
; CHECK: .set foo.has_dyn_sized_stack, or(0, bar.has_dyn_sized_stack)
; CHECK: .set foo.has_recursion, or(1, bar.has_recursion)
; CHECK: .set foo.has_indirect_call, or(0, bar.has_indirect_call)
define void @foo() {
entry:
call void @bar()
ret void
}

; CHECK-LABEL: {{^}}usefoo
; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr)
; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr)
; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr)
; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size))
; CHECK: .set usefoo.uses_vcc, or(0, foo.uses_vcc)
; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch)
; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack)
; CHECK: .set usefoo.has_recursion, or(1, foo.has_recursion)
; CHECK: .set usefoo.has_indirect_call, or(0, foo.has_indirect_call)
define amdgpu_kernel void @usefoo() {
call void @foo()
ret void
}

Loading
Loading