@@ -481,6 +481,132 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
481481 ret void
482482}
483483
484+ ; GCN-LABEL: {{^}}multi_stage_recurse2:
485+ ; GCN: .set multi_stage_recurse2.num_vgpr, max(41, multi_stage_recurse1.num_vgpr)
486+ ; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr)
487+ ; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr)
488+ ; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size))
489+ ; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
490+ ; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch)
491+ ; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
492+ ; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion)
493+ ; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
494+ ; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1))
495+ ; GCN: NumVgprs: max(41, multi_stage_recurse1.num_vgpr)
496+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size))
497+ ; GCN-LABEL: {{^}}multi_stage_recurse1:
498+ ; GCN: .set multi_stage_recurse1.num_vgpr, 41
499+ ; GCN: .set multi_stage_recurse1.num_agpr, 0
500+ ; GCN: .set multi_stage_recurse1.numbered_sgpr, 34
501+ ; GCN: .set multi_stage_recurse1.private_seg_size, 16
502+ ; GCN: .set multi_stage_recurse1.uses_vcc, 1
503+ ; GCN: .set multi_stage_recurse1.uses_flat_scratch, 0
504+ ; GCN: .set multi_stage_recurse1.has_dyn_sized_stack, 0
505+ ; GCN: .set multi_stage_recurse1.has_recursion, 1
506+ ; GCN: .set multi_stage_recurse1.has_indirect_call, 0
507+ ; GCN: TotalNumSgprs: 38
508+ ; GCN: NumVgprs: 41
509+ ; GCN: ScratchSize: 16
510+ define void @multi_stage_recurse1 (i32 %val ) #2 {
511+ call void @multi_stage_recurse2 (i32 %val )
512+ ret void
513+ }
514+ define void @multi_stage_recurse2 (i32 %val ) #2 {
515+ call void @multi_stage_recurse1 (i32 %val )
516+ ret void
517+ }
518+
519+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse:
520+ ; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr)
521+ ; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr)
522+ ; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr)
523+ ; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size))
524+ ; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc)
525+ ; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch)
526+ ; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack)
527+ ; GCN: .set usage_multi_stage_recurse.has_recursion, or(1, multi_stage_recurse1.has_recursion)
528+ ; GCN: .set usage_multi_stage_recurse.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call)
529+ ; GCN: TotalNumSgprs: 40
530+ ; GCN: NumVgprs: 41
531+ ; GCN: ScratchSize: 16
532+ define amdgpu_kernel void @usage_multi_stage_recurse (i32 %n ) #0 {
533+ call void @multi_stage_recurse1 (i32 %n )
534+ ret void
535+ }
536+
537+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
538+ ; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr)
539+ ; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
540+ ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(34, multi_stage_recurse_noattr1.numbered_sgpr)
541+ ; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size))
542+ ; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
543+ ; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch)
544+ ; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
545+ ; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
546+ ; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
547+ ; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1))
548+ ; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr)
549+ ; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size))
550+ ; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
551+ ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, 41
552+ ; GCN: .set multi_stage_recurse_noattr1.num_agpr, 0
553+ ; GCN: .set multi_stage_recurse_noattr1.numbered_sgpr, 34
554+ ; GCN: .set multi_stage_recurse_noattr1.private_seg_size, 16
555+ ; GCN: .set multi_stage_recurse_noattr1.uses_vcc, 1
556+ ; GCN: .set multi_stage_recurse_noattr1.uses_flat_scratch, 0
557+ ; GCN: .set multi_stage_recurse_noattr1.has_dyn_sized_stack, 0
558+ ; GCN: .set multi_stage_recurse_noattr1.has_recursion, 0
559+ ; GCN: .set multi_stage_recurse_noattr1.has_indirect_call, 0
560+ ; GCN: TotalNumSgprs: 38
561+ ; GCN: NumVgprs: 41
562+ ; GCN: ScratchSize: 16
563+ define void @multi_stage_recurse_noattr1 (i32 %val ) #0 {
564+ call void @multi_stage_recurse_noattr2 (i32 %val )
565+ ret void
566+ }
567+ define void @multi_stage_recurse_noattr2 (i32 %val ) #0 {
568+ call void @multi_stage_recurse_noattr1 (i32 %val )
569+ ret void
570+ }
571+
572+ ; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
573+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr)
574+ ; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr)
575+ ; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr)
576+ ; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size))
577+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc)
578+ ; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch)
579+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack)
580+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion)
581+ ; GCN: .set usage_multi_stage_recurse_noattrs.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call)
582+ ; GCN: TotalNumSgprs: 40
583+ ; GCN: NumVgprs: 41
584+ ; GCN: ScratchSize: 16
585+ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs (i32 %n ) #0 {
586+ call void @multi_stage_recurse_noattr1 (i32 %n )
587+ ret void
588+ }
589+
590+ ; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
591+ ; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr)
592+ ; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr)
593+ ; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(43, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr)
594+ ; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size))
595+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc)
596+ ; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch)
597+ ; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack)
598+ ; GCN: .set multi_call_with_multi_stage_recurse.has_recursion, or(1, use_stack0.has_recursion, use_stack1.has_recursion, multi_stage_recurse1.has_recursion)
599+ ; GCN: .set multi_call_with_multi_stage_recurse.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call, multi_stage_recurse1.has_indirect_call)
600+ ; GCN: TotalNumSgprs: 49
601+ ; GCN: NumVgprs: 41
602+ ; GCN: ScratchSize: 2052
603+ define amdgpu_kernel void @multi_call_with_multi_stage_recurse (i32 %n ) #0 {
604+ call void @use_stack0 ()
605+ call void @use_stack1 ()
606+ call void @multi_stage_recurse1 (i32 %n )
607+ ret void
608+ }
609+
484610; Make sure there's no assert when a sgpr96 is used.
485611; GCN-LABEL: {{^}}count_use_sgpr96_external_call
486612; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
0 commit comments