@@ -491,3 +491,132 @@ body: |
491491 %1:_(p5) = G_DYN_STACKALLOC %0, 32
492492 S_ENDPGM 0, implicit %1
493493 ...
494+
495+ ---
496+ name : test_dyn_stackalloc_vgpr_align4
497+ legalized : true
498+ frameInfo :
499+ maxAlignment : 4
500+ stack :
501+ - { id: 0, type: variable-sized, alignment: 4 }
502+ body : |
503+ bb.0:
504+ liveins: $vgpr0
505+
506+ ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align4
507+ ; WAVE64: liveins: $vgpr0
508+ ; WAVE64-NEXT: {{ $}}
509+ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
510+ ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
511+ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
512+ ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
513+ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
514+ ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
515+ ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
516+ ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
517+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
518+ ;
519+ ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align4
520+ ; WAVE32: liveins: $vgpr0
521+ ; WAVE32-NEXT: {{ $}}
522+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
523+ ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
524+ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
525+ ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
526+ ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
527+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
528+ ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
529+ ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
530+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
531+ %0:_(s32) = COPY $vgpr0
532+ %1:_(p5) = G_DYN_STACKALLOC %0, 4
533+ S_ENDPGM 0, implicit %1
534+ ...
535+
536+ ---
537+ name : test_dyn_stackalloc_vgpr_align16
538+ legalized : true
539+ frameInfo :
540+ maxAlignment : 16
541+ stack :
542+ - { id: 0, type: variable-sized, alignment: 16 }
543+ body : |
544+ bb.0:
545+ liveins: $vgpr0
546+
547+ ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align16
548+ ; WAVE64: liveins: $vgpr0
549+ ; WAVE64-NEXT: {{ $}}
550+ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
551+ ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
552+ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
553+ ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
554+ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
555+ ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
556+ ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
557+ ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
558+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
559+ ;
560+ ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align16
561+ ; WAVE32: liveins: $vgpr0
562+ ; WAVE32-NEXT: {{ $}}
563+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
564+ ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
565+ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
566+ ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
567+ ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
568+ ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sgpr(p5) = COPY [[COPY1]](p5)
569+ ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY2]], [[SHL]](s32)
570+ ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD]](p5)
571+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[COPY2]](p5)
572+ %0:_(s32) = COPY $vgpr0
573+ %1:_(p5) = G_DYN_STACKALLOC %0, 16
574+ S_ENDPGM 0, implicit %1
575+ ...
576+
577+ ---
578+ name : test_dyn_stackalloc_vgpr_align64
579+ legalized : true
580+ frameInfo :
581+ maxAlignment : 64
582+ stack :
583+ - { id: 0, type: variable-sized, alignment: 64 }
584+ body : |
585+ bb.0:
586+ liveins: $vgpr0
587+
588+ ; WAVE64-LABEL: name: test_dyn_stackalloc_vgpr_align64
589+ ; WAVE64: liveins: $vgpr0
590+ ; WAVE64-NEXT: {{ $}}
591+ ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
592+ ; WAVE64-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
593+ ; WAVE64-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6
594+ ; WAVE64-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
595+ ; WAVE64-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
596+ ; WAVE64-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095
597+ ; WAVE64-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
598+ ; WAVE64-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -4096
599+ ; WAVE64-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
600+ ; WAVE64-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
601+ ; WAVE64-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
602+ ; WAVE64-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
603+ ;
604+ ; WAVE32-LABEL: name: test_dyn_stackalloc_vgpr_align64
605+ ; WAVE32: liveins: $vgpr0
606+ ; WAVE32-NEXT: {{ $}}
607+ ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
608+ ; WAVE32-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.reduce.umax), [[COPY]](s32), 0
609+ ; WAVE32-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5
610+ ; WAVE32-NEXT: [[SHL:%[0-9]+]]:sgpr(s32) = G_SHL [[INTRINSIC_CONVERGENT]], [[C]](s32)
611+ ; WAVE32-NEXT: [[COPY1:%[0-9]+]]:sgpr(p5) = COPY $sp_reg
612+ ; WAVE32-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2047
613+ ; WAVE32-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[COPY1]], [[C1]](s32)
614+ ; WAVE32-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -2048
615+ ; WAVE32-NEXT: [[PTRMASK:%[0-9]+]]:sgpr(p5) = G_PTRMASK [[PTR_ADD]], [[C2]](s32)
616+ ; WAVE32-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p5) = G_PTR_ADD [[PTRMASK]], [[SHL]](s32)
617+ ; WAVE32-NEXT: $sp_reg = COPY [[PTR_ADD1]](p5)
618+ ; WAVE32-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p5)
619+ %0:_(s32) = COPY $vgpr0
620+ %1:_(p5) = G_DYN_STACKALLOC %0, 64
621+ S_ENDPGM 0, implicit %1
622+ ...
0 commit comments