From f381aa98d3f17a3f152e88df17cd8433660ab052 Mon Sep 17 00:00:00 2001 From: easyonaadit Date: Wed, 11 Dec 2024 17:00:08 +0530 Subject: [PATCH] pre-commit test cases for dynamic_alloca refactoring --- .../CodeGen/AMDGPU/non-entry-alloca-mir.ll | 573 ++++++++++++++++++ 1 file changed, 573 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/non-entry-alloca-mir.ll diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca-mir.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca-mir.ll new file mode 100644 index 0000000000000..7adae502ff144 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca-mir.ll @@ -0,0 +1,573 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=MUBUF-V5 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s -mattr=+enable-flat-scratch | FileCheck -check-prefixes=FLATSCR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel < %s -mattr=+enable-flat-scratch -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=FLATSCR %s + +define amdgpu_kernel void @non_entry_block_alloca(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { + ; MUBUF-LABEL: name: non_entry_block_alloca + ; MUBUF: bb.0.entry: + ; MUBUF-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) + ; MUBUF-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; MUBUF-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc + ; MUBUF-NEXT: S_BRANCH %bb.1 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.1.Flow: + ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %17, %bb.3 + ; MUBUF-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI]], implicit $exec + ; MUBUF-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MUBUF-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CNDMASK_B32_e64_]] + ; MUBUF-NEXT: S_CMP_LG_U32 killed [[COPY1]], killed [[S_MOV_B32_1]], implicit-def $scc + ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.4, implicit $scc + ; MUBUF-NEXT: S_BRANCH %bb.2 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.2.bb.0: + ; MUBUF-NEXT: successors: %bb.4(0x80000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; MUBUF-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_]], $private_rsrc_reg, [[COPY2]], 1024, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-NEXT: S_BRANCH %bb.4 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.3.bb.1: + ; MUBUF-NEXT: successors: %bb.1(0x80000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; MUBUF-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY3]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; MUBUF-NEXT: $sp_reg = COPY [[S_ADD_I32_1]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_1]], $private_rsrc_reg, [[COPY3]], 1024, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; MUBUF-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_2]], $private_rsrc_reg, [[COPY3]], 1028, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; MUBUF-NEXT: S_BRANCH %bb.1 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.4.bb.2: + ; MUBUF-NEXT: S_ENDPGM 0 + ; + ; MUBUF-V5-LABEL: name: non_entry_block_alloca + ; MUBUF-V5: bb.0.entry: + ; MUBUF-V5-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) + ; MUBUF-V5-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-V5-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; MUBUF-V5-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-V5-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-V5-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc + ; MUBUF-V5-NEXT: S_BRANCH %bb.1 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.1.Flow: + ; MUBUF-V5-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %17, %bb.3 + ; MUBUF-V5-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI]], implicit $exec + ; MUBUF-V5-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; MUBUF-V5-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CNDMASK_B32_e64_]] + ; MUBUF-V5-NEXT: S_CMP_LG_U32 killed [[COPY1]], killed [[S_MOV_B32_1]], implicit-def $scc + ; MUBUF-V5-NEXT: S_CBRANCH_SCC1 %bb.4, implicit $scc + ; MUBUF-V5-NEXT: S_BRANCH %bb.2 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.2.bb.0: + ; MUBUF-V5-NEXT: successors: %bb.4(0x80000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-V5-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; MUBUF-V5-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_]], $private_rsrc_reg, [[COPY2]], 1024, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-V5-NEXT: S_BRANCH %bb.4 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.3.bb.1: + ; MUBUF-V5-NEXT: successors: %bb.1(0x80000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-V5-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; MUBUF-V5-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY3]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sp_reg = COPY [[S_ADD_I32_1]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_1]], $private_rsrc_reg, [[COPY3]], 1024, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFSET killed [[V_MOV_B32_e32_2]], $private_rsrc_reg, [[COPY3]], 1028, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-V5-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; MUBUF-V5-NEXT: S_BRANCH %bb.1 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.4.bb.2: + ; MUBUF-V5-NEXT: S_ENDPGM 0 + ; + ; FLATSCR-LABEL: name: non_entry_block_alloca + ; FLATSCR: bb.0.entry: + ; FLATSCR-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) + ; FLATSCR-NEXT: liveins: $sgpr4_sgpr5 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; FLATSCR-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; FLATSCR-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; FLATSCR-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; FLATSCR-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc + ; FLATSCR-NEXT: S_BRANCH %bb.1 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.1.Flow: + ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_]], %bb.0, %16, %bb.3 + ; FLATSCR-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[PHI]], implicit $exec + ; FLATSCR-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; FLATSCR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[V_CNDMASK_B32_e64_]] + ; FLATSCR-NEXT: S_CMP_LG_U32 killed [[COPY1]], killed [[S_MOV_B32_1]], implicit-def $scc + ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.4, implicit $scc + ; FLATSCR-NEXT: S_BRANCH %bb.2 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.2.bb.0: + ; FLATSCR-NEXT: successors: %bb.4(0x80000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; FLATSCR-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; FLATSCR-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; FLATSCR-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], [[S_ADD_I32_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; FLATSCR-NEXT: S_BRANCH %bb.4 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.3.bb.1: + ; FLATSCR-NEXT: successors: %bb.1(0x80000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; FLATSCR-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; FLATSCR-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[COPY3]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; FLATSCR-NEXT: $sp_reg = COPY [[S_ADD_I32_1]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 1028 + ; FLATSCR-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[COPY3]], killed [[S_MOV_B32_4]], implicit-def dead $scc + ; FLATSCR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_1]], [[S_ADD_I32_1]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; FLATSCR-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_2]], killed [[S_ADD_I32_2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; FLATSCR-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; FLATSCR-NEXT: S_BRANCH %bb.1 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.4.bb.2: + ; FLATSCR-NEXT: S_ENDPGM 0 + entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + + bb.0: + %alloca = alloca i32, addrspace(5) + store volatile i32 0, ptr addrspace(5) %alloca + br label %bb.2 + + bb.1: + %alloca2 = alloca i32, i32 2, addrspace(5) + %gep1 = getelementptr i32, ptr addrspace(5) %alloca2, i32 1 + store volatile i32 0, ptr addrspace(5) %alloca2 + store volatile i32 1, ptr addrspace(5) %gep1 + br label %bb.2 + + bb.2: + ret void +} + +define amdgpu_kernel void @kernel_non_entry_block_static_alloca_align512(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { + ; MUBUF-LABEL: name: kernel_non_entry_block_static_alloca_align512 + ; MUBUF: bb.0.entry: + ; MUBUF-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; MUBUF-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; MUBUF-NEXT: S_BRANCH %bb.1 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.1.bb.0: + ; MUBUF-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; MUBUF-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; MUBUF-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY2]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], [[COPY3]], $private_rsrc_reg, 0, 4, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.2.bb.1: + ; MUBUF-NEXT: S_ENDPGM 0 + ; + ; MUBUF-V5-LABEL: name: kernel_non_entry_block_static_alloca_align512 + ; MUBUF-V5: bb.0.entry: + ; MUBUF-V5-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; MUBUF-V5-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-V5-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-V5-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-V5-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; MUBUF-V5-NEXT: S_BRANCH %bb.1 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.1.bb.0: + ; MUBUF-V5-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-V5-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; MUBUF-V5-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-V5-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; MUBUF-V5-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-V5-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY2]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-V5-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], [[COPY3]], $private_rsrc_reg, 0, 4, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.2.bb.1: + ; MUBUF-V5-NEXT: S_ENDPGM 0 + ; + ; FLATSCR-LABEL: name: kernel_non_entry_block_static_alloca_align512 + ; FLATSCR: bb.0.entry: + ; FLATSCR-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; FLATSCR-NEXT: liveins: $sgpr4_sgpr5 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; FLATSCR-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; FLATSCR-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; FLATSCR-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; FLATSCR-NEXT: S_BRANCH %bb.1 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.1.bb.0: + ; FLATSCR-NEXT: successors: %bb.2(0x80000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; FLATSCR-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; FLATSCR-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; FLATSCR-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; FLATSCR-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], [[S_AND_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; FLATSCR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_1]], [[S_AND_B32_]], 4, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.2.bb.1: + ; FLATSCR-NEXT: S_ENDPGM 0 + entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + + bb.0: + %alloca = alloca [16 x i32], align 512, addrspace(5) + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store volatile i32 0, ptr addrspace(5) %alloca + store volatile i32 1, ptr addrspace(5) %gep1 + br label %bb.1 + + bb.1: + ret void +} + +define amdgpu_kernel void @kernel_non_entry_block_multiple_static_alloca(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { + ; MUBUF-LABEL: name: kernel_non_entry_block_multiple_static_alloca + ; MUBUF: bb.0.entry: + ; MUBUF-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; MUBUF-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; MUBUF-NEXT: S_BRANCH %bb.1 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.1.bb.0: + ; MUBUF-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; MUBUF-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 6144 + ; MUBUF-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -65536 + ; MUBUF-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_1]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; MUBUF-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY3]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], [[COPY4]], $private_rsrc_reg, 0, 4, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; MUBUF-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_2]], [[COPY5]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.2.bb.1: + ; MUBUF-NEXT: S_ENDPGM 0 + ; + ; MUBUF-V5-LABEL: name: kernel_non_entry_block_multiple_static_alloca + ; MUBUF-V5: bb.0.entry: + ; MUBUF-V5-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; MUBUF-V5-NEXT: liveins: $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr8_sgpr9 + ; MUBUF-V5-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; MUBUF-V5-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-V5-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; MUBUF-V5-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; MUBUF-V5-NEXT: S_BRANCH %bb.1 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.1.bb.0: + ; MUBUF-V5-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-V5-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; MUBUF-V5-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; MUBUF-V5-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 6144 + ; MUBUF-V5-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-V5-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -65536 + ; MUBUF-V5-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_1]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-V5-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY3]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-V5-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], [[COPY4]], $private_rsrc_reg, 0, 4, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; MUBUF-V5-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_2]], [[COPY5]], $private_rsrc_reg, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.2.bb.1: + ; MUBUF-V5-NEXT: S_ENDPGM 0 + ; + ; FLATSCR-LABEL: name: kernel_non_entry_block_multiple_static_alloca + ; FLATSCR: bb.0.entry: + ; FLATSCR-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; FLATSCR-NEXT: liveins: $sgpr4_sgpr5 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; FLATSCR-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg.cond.kernarg.offset, align 8, addrspace 4) + ; FLATSCR-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; FLATSCR-NEXT: S_CMP_LG_U32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc + ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc + ; FLATSCR-NEXT: S_BRANCH %bb.1 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.1.bb.0: + ; FLATSCR-NEXT: successors: %bb.2(0x80000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; FLATSCR-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; FLATSCR-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[COPY1]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; FLATSCR-NEXT: $sp_reg = COPY [[S_ADD_I32_]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sp_reg + ; FLATSCR-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 6144 + ; FLATSCR-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -65536 + ; FLATSCR-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_AND_B32 killed [[S_ADD_I32_1]], killed [[S_MOV_B32_3]], implicit-def dead $scc + ; FLATSCR-NEXT: $sp_reg = COPY [[S_AND_B32_]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; FLATSCR-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32_xexec_hi = S_ADD_I32 [[S_ADD_I32_]], killed [[S_MOV_B32_4]], implicit-def dead $scc + ; FLATSCR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], [[S_ADD_I32_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; FLATSCR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_1]], killed [[S_ADD_I32_2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; FLATSCR-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_2]], [[S_AND_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca2, addrspace 5) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.2.bb.1: + ; FLATSCR-NEXT: S_ENDPGM 0 + entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + + bb.0: + %alloca = alloca [16 x i32], addrspace(5) + %alloca2 = alloca i64, i32 12, align 1024, addrspace(5) + %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 + store volatile i32 0, ptr addrspace(5) %alloca + store volatile i32 1, ptr addrspace(5) %gep1 + store volatile i32 2, ptr addrspace(5) %alloca2 + br label %bb.1 + + bb.1: + ret void +} + +define void @device_non_entry_block_static_alloca(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) { + ; MUBUF-LABEL: name: device_non_entry_block_static_alloca + ; MUBUF: bb.0.entry: + ; MUBUF-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; MUBUF-NEXT: liveins: $vgpr2, $vgpr3 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; MUBUF-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; MUBUF-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; MUBUF-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: S_BRANCH %bb.1 + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.1.bb.0: + ; MUBUF-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr32 + ; MUBUF-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3072 + ; MUBUF-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; MUBUF-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-NEXT: $sgpr32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; MUBUF-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_3]], [[COPY3]], implicit $exec + ; MUBUF-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY4]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], killed [[V_LSHL_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-NEXT: {{ $}} + ; MUBUF-NEXT: bb.2.bb.1: + ; MUBUF-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-NEXT: SI_RETURN + ; + ; MUBUF-V5-LABEL: name: device_non_entry_block_static_alloca + ; MUBUF-V5: bb.0.entry: + ; MUBUF-V5-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; MUBUF-V5-NEXT: liveins: $vgpr2, $vgpr3 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; MUBUF-V5-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; MUBUF-V5-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; MUBUF-V5-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; MUBUF-V5-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-V5-NEXT: S_BRANCH %bb.1 + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.1.bb.0: + ; MUBUF-V5-NEXT: successors: %bb.2(0x80000000) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr32 + ; MUBUF-V5-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3072 + ; MUBUF-V5-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; MUBUF-V5-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; MUBUF-V5-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; MUBUF-V5-NEXT: $sgpr32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; MUBUF-V5-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; MUBUF-V5-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_3]], [[COPY3]], implicit $exec + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; MUBUF-V5-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_]], [[COPY4]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; MUBUF-V5-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; MUBUF-V5-NEXT: BUFFER_STORE_DWORD_OFFEN killed [[V_MOV_B32_e32_1]], killed [[V_LSHL_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; MUBUF-V5-NEXT: {{ $}} + ; MUBUF-V5-NEXT: bb.2.bb.1: + ; MUBUF-V5-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; MUBUF-V5-NEXT: SI_RETURN + ; + ; FLATSCR-LABEL: name: device_non_entry_block_static_alloca + ; FLATSCR: bb.0.entry: + ; FLATSCR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FLATSCR-NEXT: liveins: $vgpr2, $vgpr3 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; FLATSCR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; FLATSCR-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; FLATSCR-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], implicit $exec + ; FLATSCR-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF killed [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: S_BRANCH %bb.1 + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.1.bb.0: + ; FLATSCR-NEXT: successors: %bb.2(0x80000000) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr32 + ; FLATSCR-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 3072 + ; FLATSCR-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY2]], killed [[S_MOV_B32_1]], implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -32768 + ; FLATSCR-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_AND_B32 killed [[S_ADD_I32_]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; FLATSCR-NEXT: $sgpr32 = COPY [[S_AND_B32_]] + ; FLATSCR-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc + ; FLATSCR-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 2 + ; FLATSCR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]] + ; FLATSCR-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_3]], [[COPY3]], implicit $exec + ; FLATSCR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_]], [[S_AND_B32_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.alloca, addrspace 5) + ; FLATSCR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD killed [[V_MOV_B32_e32_1]], killed [[V_LSHL_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.gep1, addrspace 5) + ; FLATSCR-NEXT: {{ $}} + ; FLATSCR-NEXT: bb.2.bb.1: + ; FLATSCR-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; FLATSCR-NEXT: SI_RETURN + entry: + %cond = icmp eq i32 %arg.cond, 0 + br i1 %cond, label %bb.0, label %bb.1 + + bb.0: + %alloca = alloca i32, i32 10, align 512, addrspace(5) + %gep1 = getelementptr i32, ptr addrspace(5) %alloca, i32 %in + store volatile i32 0, ptr addrspace(5) %alloca + store volatile i32 1, ptr addrspace(5) %gep1 + br label %bb.1 + + bb.1: + ret void +}