@@ -482,7 +482,7 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
482482}
483483
484484; GCN-LABEL: {{^}}multi_stage_recurse2:
485- ; GCN: .set multi_stage_recurse2.num_vgpr, max(41 , multi_stage_recurse1.num_vgpr)
485+ ; GCN: .set multi_stage_recurse2.num_vgpr, max(43 , multi_stage_recurse1.num_vgpr)
486486; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487487; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488488; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
@@ -492,27 +492,29 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
492492; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493493; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494494; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495- ; GCN: NumVgprs: max(41 , multi_stage_recurse1.num_vgpr)
495+ ; GCN: NumVgprs: max(43 , multi_stage_recurse1.num_vgpr)
496496; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497497; GCN-LABEL: {{^}}multi_stage_recurse1:
498- ; GCN: .set multi_stage_recurse1.num_vgpr, 41
499- ; GCN: .set multi_stage_recurse1.num_agpr, 0
500- ; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
498+ ; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr)
499+ ; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr)
500+ ; GCN: .set multi_stage_recurse1.numbered_sgpr, max(34, amdgpu.max_num_sgpr)
501501; GCN: .set multi_stage_recurse1.private_seg_size, 16
502502; GCN: .set multi_stage_recurse1.uses_vcc, 1
503503; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504504; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505505; GCN: .set multi_stage_recurse1.has_recursion, 1
506506; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507- ; GCN: TotalNumSgprs: 38
508- ; GCN: NumVgprs: 41
507+ ; GCN: TotalNumSgprs: multi_stage_recurse1.numbered_sgpr+4
508+ ; GCN: NumVgprs: max(48, amdgpu.max_num_vgpr)
509509; GCN: ScratchSize: 16
510510define void @multi_stage_recurse1 (i32 %val ) #2 {
511511 call void @multi_stage_recurse2 (i32 %val )
512+ call void asm sideeffect "" , "~{v47}" () #0
512513 ret void
513514}
514515define void @multi_stage_recurse2 (i32 %val ) #2 {
515516 call void @multi_stage_recurse1 (i32 %val )
517+ call void asm sideeffect "" , "~{v42}" () #0
516518 ret void
517519}
518520
@@ -526,8 +528,8 @@ define void @multi_stage_recurse2(i32 %val) #2 {
526528; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
527529; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
528530; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
529- ; GCN: TotalNumSgprs: 40
530- ; GCN: NumVgprs: 41
531+ ; GCN: TotalNumSgprs: usage_multi_stage_recurse.numbered_sgpr+6
532+ ; GCN: NumVgprs: usage_multi_stage_recurse.num_vgpr
531533; GCN: ScratchSize: 16
532534define amdgpu_kernel void @usage_multi_stage_recurse (i32 %n ) #0 {
533535 call void @multi_stage_recurse1 (i32 %n )
@@ -537,7 +539,7 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
537539; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
538540; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
539541; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
540- ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34 , multi_stage_recurse_noattr1.numbered_sgpr)
542+ ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54 , multi_stage_recurse_noattr1.numbered_sgpr)
541543; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
542544; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
543545; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
@@ -548,24 +550,26 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
548550; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
549551; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
550552; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
551- ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
552- ; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
553- ; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
553+ ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr)
554+ ; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr)
555+ ; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, max(57, amdgpu.max_num_sgpr)
554556; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
555557; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
556558; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
557559; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
558560; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
559561; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
560- ; GCN: TotalNumSgprs: 38
561- ; GCN: NumVgprs: 41
562+ ; GCN: TotalNumSgprs: multi_stage_recurse_noattr1.numbered_sgpr+4
563+ ; GCN: NumVgprs: max(41, amdgpu.max_num_vgpr)
562564; GCN: ScratchSize: 16
563565define void @multi_stage_recurse_noattr1 (i32 %val ) #0 {
564566 call void @multi_stage_recurse_noattr2 (i32 %val )
567+ call void asm sideeffect "" , "~{s56}" () #0
565568 ret void
566569}
567570define void @multi_stage_recurse_noattr2 (i32 %val ) #0 {
568571 call void @multi_stage_recurse_noattr1 (i32 %val )
572+ call void asm sideeffect "" , "~{s53}" () #0
569573 ret void
570574}
571575
@@ -579,8 +583,8 @@ define void @multi_stage_recurse_noattr2(i32 %val) #0 {
579583; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
580584; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
581585; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
582- ; GCN: TotalNumSgprs: 40
583- ; GCN: NumVgprs: 41
586+ ; GCN: TotalNumSgprs: usage_multi_stage_recurse_noattrs.numbered_sgpr+6
587+ ; GCN: NumVgprs: usage_multi_stage_recurse_noattrs.num_vgpr
584588; GCN: ScratchSize: 16
585589define amdgpu_kernel void @usage_multi_stage_recurse_noattrs (i32 %n ) #0 {
586590 call void @multi_stage_recurse_noattr1 (i32 %n )
@@ -597,8 +601,8 @@ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
597601; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
598602; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
599603; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
600- ; GCN: TotalNumSgprs: 49
601- ; GCN: NumVgprs: 41
604+ ; GCN: TotalNumSgprs: multi_call_with_multi_stage_recurse.numbered_sgpr+6
605+ ; GCN: NumVgprs: multi_call_with_multi_stage_recurse.num_vgpr
602606; GCN: ScratchSize: 2052
603607define amdgpu_kernel void @multi_call_with_multi_stage_recurse (i32 %n ) #0 {
604608 call void @use_stack0 ()
0 commit comments