diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d57074272487e..db49b7893821a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1580,6 +1580,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) { static unsigned getVGPRSpillSaveOpcode(unsigned Size) { switch (Size) { + case 2: + return AMDGPU::SI_SPILL_V16_SAVE; case 4: return AMDGPU::SI_SPILL_V32_SAVE; case 8: @@ -1807,6 +1809,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { switch (Size) { + case 2: + return AMDGPU::SI_SPILL_V16_RESTORE; case 4: return AMDGPU::SI_SPILL_V32_RESTORE; case 8: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6f80dbcfe5e71..dc5158352e77a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1003,6 +1003,7 @@ multiclass SI_SPILL_VGPR { } // End UseNamedOperandTable = 1, Spill = 1, VALU = 1, SchedRW = [WriteVMEM] } +defm SI_SPILL_V16 : SI_SPILL_VGPR ; defm SI_SPILL_V32 : SI_SPILL_VGPR ; defm SI_SPILL_V64 : SI_SPILL_VGPR ; defm SI_SPILL_V96 : SI_SPILL_VGPR ; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index ae59fa8234d03..6f99c7b4c4962 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1280,6 +1280,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_WWM_V32_RESTORE: case AMDGPU::SI_SPILL_WWM_AV32_SAVE: case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: + case AMDGPU::SI_SPILL_V16_SAVE: + case AMDGPU::SI_SPILL_V16_RESTORE: return 1; default: llvm_unreachable("Invalid spill opcode"); } @@ -2350,6 +2352,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_V16_SAVE: case AMDGPU::SI_SPILL_A1024_SAVE: case AMDGPU::SI_SPILL_A512_SAVE: case AMDGPU::SI_SPILL_A384_SAVE: @@ -2390,8 +2393,15 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()); - unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR - : AMDGPU::BUFFER_STORE_DWORD_OFFSET; + unsigned Opc; + if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) { + assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); + Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16; + } else { + Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR + : AMDGPU::BUFFER_STORE_DWORD_OFFSET; + } + auto *MBB = MI->getParent(); bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); if (IsWWMRegSpill) { @@ -2409,6 +2419,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MI->eraseFromParent(); return true; } + case AMDGPU::SI_SPILL_V16_RESTORE: case AMDGPU::SI_SPILL_V32_RESTORE: case AMDGPU::SI_SPILL_V64_RESTORE: case AMDGPU::SI_SPILL_V96_RESTORE: @@ -2458,8 +2469,14 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == MFI->getStackPtrOffsetReg()); - unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR - : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; + unsigned Opc; + if (MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) { + assert(ST.enableFlatScratch() && "Flat Scratch is not enabled!"); + Opc = AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16; + } else { + Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR + : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; + } auto *MBB = MI->getParent(); bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode()); if (IsWWMRegSpill) { diff --git a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir new file mode 100644 index 0000000000000..0c694d9f49e18 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 -mattr=+real-true16 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=EXPANDED %s + +--- +name: spill_restore_vgpr16 +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 + hasSpilledVGPRs: true +body: | + ; EXPANDED-LABEL: name: spill_restore_vgpr16 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 + bb.0: + successors: %bb.1(0x80000000) + S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5) + S_NOP 0, implicit renamable $vgpr0_lo16 + SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5) + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + bb.1: + successors: %bb.2(0x80000000) + S_NOP 1 + bb.2: + $vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5) + $vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5) + S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 +... + +--- +name: spill_restore_vgpr16_middle_of_block +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 + hasSpilledVGPRs: true +body: | + ; EXPANDED-LABEL: name: spill_restore_vgpr16_middle_of_block + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 + bb.0: + successors: %bb.1(0x80000000) + S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5) + S_NOP 0, implicit renamable $vgpr0_lo16 + SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5) + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + bb.1: + successors: %bb.2(0x80000000) + S_NOP 1 + bb.2: + S_NOP 1 + $vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5) + $vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5) + S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 +... + +--- +name: spill_restore_vgpr16_end_of_block +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 4, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 + hasSpilledVGPRs: true +body: | + ; EXPANDED-LABEL: name: spill_restore_vgpr16_end_of_block + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit renamable $vgpr0_lo16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5) + ; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5) + bb.0: + successors: %bb.1(0x80000000) + S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5) + S_NOP 0, implicit renamable $vgpr0_lo16 + SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5) + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + bb.1: + successors: %bb.2(0x80000000) + S_NOP 1 + bb.2: + $vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5) + $vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5) +... diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll new file mode 100644 index 0000000000000..0e45df223465d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-TRUE16 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck %s -check-prefixes=GCN,GCN-FAKE16 + +define void @spill_i16_alu() { +; GCN-TRUE16-LABEL: spill_i16_alu: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_i16_alu: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca i16, i32 1, align 4, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + %a = load volatile i16, ptr addrspace(5) %aptr + %add = add i16 %a, 123 + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + store volatile i16 %add, ptr addrspace(5) %outptr + + ret void +} + +define void @spill_i16_alu_two_vals() { +; GCN-TRUE16-LABEL: spill_i16_alu_two_vals: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:4 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_hi_b16 v0, off, s32 offset:6 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: v_add_nc_u16 v0.l, 0x7b, v0.l +; GCN-TRUE16-NEXT: scratch_store_d16_hi_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_i16_alu_two_vals: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:4 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_b32 v1, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: v_add_nc_u16 v0, 0x7b, v0 +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v1, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:4 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca i16, i32 1, align 4, addrspace(5) + %alloca2 = alloca i16, i32 1, align 4, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + %a = load volatile i16, ptr addrspace(5) %aptr + %add = add i16 %a, 123 + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %bptr = getelementptr i16, ptr addrspace(5) %alloca2, i32 0 + %b = load volatile i16, ptr addrspace(5) %bptr + %badd = add i16 %b, 123 + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + store volatile i16 %add, ptr addrspace(5) %outptr + %outptr2 = getelementptr i16, ptr addrspace(5) %alloca2, i32 0 + store volatile i16 %badd, ptr addrspace(5) %outptr2 + + ret void +} + +; Tests after this do not actually test 16 bit spills because there is no use of VGPR_16. They could demonstrate 16 bit spills if we update the instructions to use VGPR_16 instead of VGPR_32 + +define void @spill_i16() { +; GCN-TRUE16-LABEL: spill_i16: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_i16: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca i16, i32 1, align 4, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + %a = load volatile i16, ptr addrspace(5) %aptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + store volatile i16 %a, ptr addrspace(5) %outptr + + ret void +} + +define void @spill_half() { +; GCN-TRUE16-LABEL: spill_half: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_half: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:4 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca half, i32 1, align 4, addrspace(5) + + %aptr = getelementptr half, ptr addrspace(5) %alloca, i32 0 + %a = load volatile half, ptr addrspace(5) %aptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr half, ptr addrspace(5) %alloca, i32 0 + store volatile half %a, ptr addrspace(5) %outptr + + ret void +} + +define void @spill_i16_from_v2i16() { +; GCN-TRUE16-LABEL: spill_i16_from_v2i16: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_i16_from_v2i16: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + %a = load volatile i16, ptr addrspace(5) %aptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + store volatile i16 %a, ptr addrspace(5) %outptr + + ret void +} + +define void @spill_2xi16_from_v2i16() { +; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 offset:2 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:12 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + %a = load volatile i16, ptr addrspace(5) %aptr + %bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + %b = load volatile i16, ptr addrspace(5) %bptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + store volatile i16 %a, ptr addrspace(5) %outptr + %boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + store volatile i16 %b, ptr addrspace(5) %boutptr + + ret void +} + +define void @spill_2xi16_from_v2i16_one_free_reg() { +; GCN-TRUE16-LABEL: spill_2xi16_from_v2i16_one_free_reg: +; GCN-TRUE16: ; %bb.0: ; %entry +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:2 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:8 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:10 ; 2-byte Folded Spill +; GCN-TRUE16-NEXT: ;;#ASMSTART +; GCN-TRUE16-NEXT: ;;#ASMEND +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:8 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 offset:2 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 offset:10 ; 2-byte Folded Reload +; GCN-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GCN-TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GCN-FAKE16-LABEL: spill_2xi16_from_v2i16_one_free_reg: +; GCN-FAKE16: ; %bb.0: ; %entry +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v7, off, s32 offset:2 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-FAKE16-NEXT: ;;#ASMSTART +; GCN-FAKE16-NEXT: ;;#ASMEND +; GCN-FAKE16-NEXT: scratch_store_b16 off, v7, s32 offset:2 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GCN-FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc +; GCN-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-FAKE16-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) + + %aptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + %a = load volatile i16, ptr addrspace(5) %aptr + %bptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + %b = load volatile i16, ptr addrspace(5) %bptr + + ; Force %a to spill. + ; Would not need to spill if the short scratch instructions used vgpr_16 + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6}" () + + %outptr = getelementptr i16, ptr addrspace(5) %alloca, i32 1 + store volatile i16 %a, ptr addrspace(5) %outptr + %boutptr = getelementptr i16, ptr addrspace(5) %alloca, i32 0 + store volatile i16 %b, ptr addrspace(5) %boutptr + + ret void +} + +define void @spill_v2i16() { +; GCN-LABEL: spill_v2i16: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: scratch_load_b32 v0, off, s32 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: s_setpc_b64 s[30:31] +entry: + %alloca = alloca <2 x i16>, i32 2, align 1, addrspace(5) + + %aptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1 + %a = load volatile <2 x i16>, ptr addrspace(5) %aptr + + ; Force %a to spill. + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () + + %outptr = getelementptr <2 x i16>, ptr addrspace(5) %alloca, i32 1 + store volatile <2 x i16> %a, ptr addrspace(5) %outptr + + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.mir b/llvm/test/CodeGen/AMDGPU/spillv16.mir new file mode 100644 index 0000000000000..05569bf394c43 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spillv16.mir @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 -mattr=+real-true16 -run-pass=regallocfast -o - %s | FileCheck -check-prefix=SPILLED %s +# RUN: llc -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 -mattr=+real-true16 -run-pass=regallocfast,prologepilog -o - %s | FileCheck -check-prefix=EXPANDED %s + +--- +name: spill_restore_vgpr16 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + ; SPILLED-LABEL: name: spill_restore_vgpr16 + ; SPILLED: bb.0: + ; SPILLED-NEXT: successors: %bb.1(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + ; SPILLED-NEXT: SI_SPILL_V16_SAVE killed $vgpr0_hi16, %stack.1, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.1, addrspace 5) + ; SPILLED-NEXT: SI_SPILL_V16_SAVE killed $vgpr0_lo16, %stack.0, $sgpr32, 0, implicit $exec :: (store (s16) into %stack.0, addrspace 5) + ; SPILLED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.1: + ; SPILLED-NEXT: successors: %bb.2(0x80000000) + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: S_NOP 1 + ; SPILLED-NEXT: {{ $}} + ; SPILLED-NEXT: bb.2: + ; SPILLED-NEXT: $vgpr0_lo16 = SI_SPILL_V16_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.0, addrspace 5) + ; SPILLED-NEXT: $vgpr0_hi16 = SI_SPILL_V16_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s16) from %stack.1, addrspace 5) + ; SPILLED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 + ; + ; EXPANDED-LABEL: name: spill_restore_vgpr16 + ; EXPANDED: bb.0: + ; EXPANDED-NEXT: successors: %bb.1(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16 + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, addrspace 5) + ; EXPANDED-NEXT: SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5) + ; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.1: + ; EXPANDED-NEXT: successors: %bb.2(0x80000000) + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: S_NOP 1 + ; EXPANDED-NEXT: {{ $}} + ; EXPANDED-NEXT: bb.2: + ; EXPANDED-NEXT: $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, addrspace 5) + ; EXPANDED-NEXT: $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, addrspace 5) + ; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16 + bb.0: + S_NOP 0, implicit-def %0:vgpr_16, implicit-def %1:vgpr_16 + S_CBRANCH_SCC1 implicit undef $scc, %bb.1 + + bb.1: + S_NOP 1 + + bb.2: + S_NOP 0, implicit %0, implicit %1 +...