Skip to content

Commit a60c29f

Browse files
committed
Conservative register count for recursion, comments, add reg usage in introduced test.
1 parent a72d1ef commit a60c29f

File tree

3 files changed

+50
-22
lines changed

3 files changed

+50
-22
lines changed

llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,32 @@ void MCResourceInfo::assignResourceInfoExpr(
114114
MCSymbol *CalleeFnSym = TM.getSymbol(&Callee->getFunction());
115115
MCSymbol *CalleeValSym =
116116
getSymbol(CalleeFnSym->getName(), RIK, OutContext);
117+
118+
// Avoid constructing recursive definitions by detecting whether `Sym` is
119+
// found transitively within any of its `CalleeValSym`.
117120
if (!CalleeValSym->isVariable() ||
118121
!CalleeValSym->getVariableValue(/*isUsed=*/false)
119122
->isSymbolUsedInExpression(Sym)) {
120123
ArgExprs.push_back(MCSymbolRefExpr::create(CalleeValSym, OutContext));
124+
} else {
125+
// In case of recursion: make sure to use conservative register counts
126+
// (i.e., specifically for VGPR/SGPR/AGPR).
127+
switch (RIK) {
128+
default:
129+
break;
130+
case RIK_NumVGPR:
131+
ArgExprs.push_back(MCSymbolRefExpr::create(
132+
getMaxVGPRSymbol(OutContext), OutContext));
133+
break;
134+
case RIK_NumSGPR:
135+
ArgExprs.push_back(MCSymbolRefExpr::create(
136+
getMaxSGPRSymbol(OutContext), OutContext));
137+
break;
138+
case RIK_NumAGPR:
139+
ArgExprs.push_back(MCSymbolRefExpr::create(
140+
getMaxAGPRSymbol(OutContext), OutContext));
141+
break;
142+
}
121143
}
122144
}
123145
if (ArgExprs.size() > 1)
@@ -181,6 +203,8 @@ void MCResourceInfo::gatherResourceInfo(
181203
MCSymbol *CalleeValSym =
182204
getSymbol(CalleeFnSym->getName(), RIK_PrivateSegSize, OutContext);
183205

206+
// Avoid constructing recursive definitions by detecting whether `Sym`
207+
// is found transitively within any of its `CalleeValSym`.
184208
if (!CalleeValSym->isVariable() ||
185209
!CalleeValSym->getVariableValue(/*isUsed=*/false)
186210
->isSymbolUsedInExpression(Sym)) {

llvm/test/CodeGen/AMDGPU/function-resource-usage.ll

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
482482
}
483483

484484
; GCN-LABEL: {{^}}multi_stage_recurse2:
485-
; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
485+
; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr)
486486
; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487487
; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488488
; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
@@ -492,27 +492,29 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
492492
; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493493
; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494494
; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495-
; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
495+
; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr)
496496
; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497497
; GCN-LABEL: {{^}}multi_stage_recurse1:
498-
; GCN: .set multi_stage_recurse1.num_vgpr, 41
499-
; GCN: .set multi_stage_recurse1.num_agpr, 0
500-
; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
498+
; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr)
499+
; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr)
500+
; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
501501
; GCN: .set multi_stage_recurse1.private_seg_size, 16
502502
; GCN: .set multi_stage_recurse1.uses_vcc, 1
503503
; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504504
; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505505
; GCN: .set multi_stage_recurse1.has_recursion, 1
506506
; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507-
; GCN: TotalNumSgprs: 38
508-
; GCN: NumVgprs: 41
507+
; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4
508+
; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr)
509509
; GCN: ScratchSize: 16
510510
define void @multi_stage_recurse1(i32 %val) #2 {
511511
call void @multi_stage_recurse2(i32 %val)
512+
call void asm sideeffect "", "~{v47}"() #0
512513
ret void
513514
}
514515
define void @multi_stage_recurse2(i32 %val) #2 {
515516
call void @multi_stage_recurse1(i32 %val)
517+
call void asm sideeffect "", "~{v42}"() #0
516518
ret void
517519
}
518520

@@ -526,8 +528,8 @@ define void @multi_stage_recurse2(i32 %val) #2 {
526528
; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
527529
; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
528530
; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
529-
; GCN: TotalNumSgprs: 40
530-
; GCN: NumVgprs: 41
531+
; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6
532+
; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr
531533
; GCN: ScratchSize: 16
532534
define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
533535
call void @multi_stage_recurse1(i32 %n)
@@ -537,7 +539,7 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
537539
; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
538540
; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
539541
; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
540-
; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
542+
; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr)
541543
; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
542544
; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
543545
; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
@@ -548,24 +550,26 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
548550
; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
549551
; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
550552
; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
551-
; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
552-
; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
553-
; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
553+
; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr)
554+
; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr)
555+
; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr)
554556
; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
555557
; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
556558
; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
557559
; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
558560
; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
559561
; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
560-
; GCN: TotalNumSgprs: 38
561-
; GCN: NumVgprs: 41
562+
; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4
563+
; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr)
562564
; GCN: ScratchSize: 16
563565
define void @multi_stage_recurse_noattr1(i32 %val) #0 {
564566
call void @multi_stage_recurse_noattr2(i32 %val)
567+
call void asm sideeffect "", "~{s56}"() #0
565568
ret void
566569
}
567570
define void @multi_stage_recurse_noattr2(i32 %val) #0 {
568571
call void @multi_stage_recurse_noattr1(i32 %val)
572+
call void asm sideeffect "", "~{s53}"() #0
569573
ret void
570574
}
571575

@@ -579,8 +583,8 @@ define void @multi_stage_recurse_noattr2(i32 %val) #0 {
579583
; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
580584
; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
581585
; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
582-
; GCN: TotalNumSgprs: 40
583-
; GCN: NumVgprs: 41
586+
; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6
587+
; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr
584588
; GCN: ScratchSize: 16
585589
define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
586590
call void @multi_stage_recurse_noattr1(i32 %n)
@@ -597,8 +601,8 @@ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
597601
; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
598602
; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
599603
; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
600-
; GCN: TotalNumSgprs: 49
601-
; GCN: NumVgprs: 41
604+
; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6
605+
; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr
602606
; GCN: ScratchSize: 2052
603607
define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
604608
call void @use_stack0()

llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434
; CHECK: .set bar.has_indirect_call, or(0, baz.has_indirect_call)
3535

3636
; CHECK-LABEL: {{^}}foo
37-
; CHECK: .set foo.num_vgpr, 42
38-
; CHECK: .set foo.num_agpr, 0
39-
; CHECK: .set foo.numbered_sgpr, 34
37+
; CHECK: .set foo.num_vgpr, max(42, amdgpu.max_num_vgpr)
38+
; CHECK: .set foo.num_agpr, max(0, amdgpu.max_num_agpr)
39+
; CHECK: .set foo.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
4040
; CHECK: .set foo.private_seg_size, 16
4141
; CHECK: .set foo.uses_vcc, 1
4242
; CHECK: .set foo.uses_flat_scratch, 0

0 commit comments

Comments
 (0)