diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7523b619748cc..4c571a36e4896 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2432,7 +2432,94 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MI->eraseFromParent(); return true; } + case AMDGPU::S_ADD_I32: { + // TODO: Handle s_or_b32, s_and_b32. + unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1; + MachineOperand &OtherOp = MI->getOperand(OtherOpIdx); + assert(FrameReg || MFI->isBottomOfStack()); + + MachineOperand &DstOp = MI->getOperand(0); + const DebugLoc &DL = MI->getDebugLoc(); + Register MaterializedReg = FrameReg; + + // Defend against live scc, which should never happen in practice. + bool DeadSCC = MI->getOperand(3).isDead(); + + Register TmpReg; + + if (FrameReg && !ST.enableFlatScratch()) { + // FIXME: In the common case where the add does not also read its result + // (i.e. this isn't a reg += fi), it's not finding the dest reg as + // available. + TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI, + false, 0); + BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32)) + .addDef(TmpReg, RegState::Renamable) + .addReg(FrameReg) + .addImm(ST.getWavefrontSizeLog2()) + .setOperandDead(3); // Set SCC dead + MaterializedReg = TmpReg; + } + + int64_t Offset = FrameInfo.getObjectOffset(Index); + + // For the non-immediate case, we could fall through to the default + // handling, but we do an in-place update of the result register here to + // avoid scavenging another register. + if (OtherOp.isImm()) { + OtherOp.setImm(OtherOp.getImm() + Offset); + Offset = 0; + + if (MaterializedReg) + FIOp.ChangeToRegister(MaterializedReg, false); + else + FIOp.ChangeToImmediate(0); + } else if (MaterializedReg) { + // If we can't fold the other operand, do another increment. + Register DstReg = DstOp.getReg(); + + if (!TmpReg && MaterializedReg == FrameReg) { + TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, + MI, false, 0); + DstReg = TmpReg; + } + + auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32)) + .addDef(DstReg, RegState::Renamable) + .addReg(MaterializedReg, RegState::Kill) + .add(OtherOp); + if (DeadSCC) + AddI32.setOperandDead(3); + + MaterializedReg = DstReg; + + OtherOp.ChangeToRegister(MaterializedReg, false); + OtherOp.setIsKill(true); + OtherOp.setIsRenamable(true); + FIOp.ChangeToImmediate(Offset); + } else { + // If we don't have any other offset to apply, we can just directly + // interpret the frame index as the offset. + FIOp.ChangeToImmediate(Offset); + } + + if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) { + assert(Offset == 0); + MI->removeOperand(3); + MI->removeOperand(OtherOpIdx); + MI->setDesc(TII->get(FIOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + } else if (DeadSCC && FIOp.isImm() && FIOp.getImm() == 0) { + assert(Offset == 0); + MI->removeOperand(3); + MI->removeOperand(FIOperandNum); + MI->setDesc( + TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32)); + } + + assert(!FIOp.isFI()); + return true; + } default: { // Other access to frame index const DebugLoc &DL = MI->getDebugLoc(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index f4fd803c8dda8..04833eaaa3283 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -15,11 +15,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 -; GFX9-NEXT: s_add_i32 s1, s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, 0 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -36,8 +34,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_i32 s0, s0, 0 -; GFX10-NEXT: s_add_i32 s1, s1, 0 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -51,11 +47,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_lshl_b32 s1, s0, 2 ; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_add_i32 s1, s1, 0 ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s0, 0 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm @@ -68,8 +62,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_add_i32 s0, s0, 0 -; GFX11-NEXT: s_add_i32 s1, s1, 0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc @@ -84,8 +76,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: s_lshl_b32 s1, s1, 2 -; GFX12-NEXT: s_add_co_i32 s0, s0, 0 -; GFX12-NEXT: s_add_co_i32 s1, s1, 0 ; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS @@ -1042,13 +1032,13 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3e80 -; GFX9-NEXT: s_add_i32 s1, s32, 4 +; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_i32 s1, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: s_add_i32 s0, s1, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -1059,10 +1049,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 13 -; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_i32 s1, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_add_i32 s1, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -1074,13 +1064,13 @@ define void @store_load_large_imm_offset_foo() { ; GFX940-LABEL: store_load_large_imm_offset_foo: ; GFX940: ; %bb.0: ; %bb ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v0, 13 ; GFX940-NEXT: s_movk_i32 s0, 0x3e80 -; GFX940-NEXT: s_add_i32 s1, s32, 4 +; GFX940-NEXT: v_mov_b32_e32 v0, 13 +; GFX940-NEXT: s_add_i32 s1, s32, s0 ; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_add_i32 s0, s0, s1 +; GFX940-NEXT: s_add_i32 s0, s1, 4 ; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 @@ -1092,9 +1082,9 @@ define void @store_load_large_imm_offset_foo() { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 ; GFX11-NEXT: s_movk_i32 s0, 0x3e80 -; GFX11-NEXT: s_add_i32 s1, s32, 4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s1, s32, s0 +; GFX11-NEXT: s_add_i32 s0, s1, 4 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index a09b39069e5c9..585bfb4c58eae 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -21,13 +21,13 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm__fi_offset0 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def dead $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm__fi_offset0 @@ -54,13 +54,13 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__inline_imm - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 12, implicit-def dead $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 12, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__inline_imm @@ -88,25 +88,21 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 16, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 28, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__inline_imm___fi_offset_inline_imm - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 16, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 12, killed $sgpr4, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 28, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 12, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7 @@ -125,13 +121,13 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset0 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def dead $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, $sgpr4, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset0 @@ -158,13 +154,13 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__literal - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 68, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__literal @@ -192,25 +188,21 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__literal__fi_offset96 - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 68, killed $sgpr4, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -230,25 +222,21 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32____fi_offset96__literal - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 164, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32____fi_offset96__literal - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 68, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 164, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -270,27 +258,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0 ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -312,27 +304,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 %stack.0, $sgpr8, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -355,31 +351,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 80, implicit-def dead $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 80, implicit-def dead $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 80, implicit-def dead $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc SI_RETURN implicit $sgpr7 @@ -402,71 +398,157 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 80, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr7, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr4, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_literal_offset__sgpr ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 80, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 80, killed renamable $sgpr4, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, $sgpr8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc ... -# FIXME: Fail verifier -# --- -# name: s_add_i32__kernel__literal__fi_offset96__offset_literal -# tracksRegLiveness: true -# stack: -# - { id: 0, size: 96, alignment: 16 } -# - { id: 1, size: 128, alignment: 4 } -# machineFunctionInfo: -# scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' -# frameOffsetReg: '$sgpr33' -# stackPtrOffsetReg: '$sgpr32' -# isEntryFunction: true -# body: | -# bb.0: -# renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc -# SI_RETURN implicit $sgpr7 -# ... - -# --- -# name: s_add_i32__kernel__fi_offset96__offset_literal__literal -# tracksRegLiveness: true -# stack: -# - { id: 0, size: 96, alignment: 16 } -# - { id: 1, size: 128, alignment: 4 } -# machineFunctionInfo: -# scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' -# frameOffsetReg: '$sgpr33' -# stackPtrOffsetReg: '$sgpr32' -# isEntryFunction: true -# body: | -# bb.0: -# renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def $scc -# SI_RETURN implicit $sgpr7, implicit $scc - -# ... + +--- +name: s_add_i32__kernel__literal__fi_offset96__offset_literal +tracksRegLiveness: true +stack: + - { id: 0, size: 96, alignment: 16 } + - { id: 1, size: 128, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal + ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal + ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal + ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal + ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def dead $scc + SI_RETURN implicit $sgpr7 +... + +--- +name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc +tracksRegLiveness: true +stack: + - { id: 0, size: 96, alignment: 16 } + - { id: 1, size: 128, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc + ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; MUBUFW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc + ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; FLATSCRW64-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; FLATSCRW32-LABEL: name: s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 164, 0, implicit-def $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + renamable $sgpr7 = S_ADD_I32 68, %stack.1, implicit-def $scc + SI_RETURN implicit $sgpr7, implicit $scc +... + +--- +name: s_add_i32__kernel__fi_offset96__offset_literal__literal +tracksRegLiveness: true +stack: + - { id: 0, size: 96, alignment: 16 } + - { id: 1, size: 128, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal + ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal + ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal + ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__kernel__fi_offset96__offset_literal__literal + ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 %stack.1, 68, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... --- name: s_add_i32__kernel__sgpr__fi_literal_offset @@ -620,27 +702,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_offset0__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, $sgpr32, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -709,31 +795,31 @@ body: | ; MUBUFW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW64: liveins: $sgpr8 ; MUBUFW64-NEXT: {{ $}} - ; MUBUFW64-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; MUBUFW32: liveins: $sgpr8 ; MUBUFW32-NEXT: {{ $}} - ; MUBUFW32-NEXT: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 96, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr7, 96, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW64: liveins: $sgpr8 ; FLATSCRW64-NEXT: {{ $}} - ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 96, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__sgpr__fi_literal_offset__live_scc ; FLATSCRW32: liveins: $sgpr8 ; FLATSCRW32-NEXT: {{ $}} - ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 96, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr8, killed $sgpr4, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed renamable $sgpr4, 96, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -754,25 +840,21 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, $sgpr4, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, killed $sgpr4, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, $sgpr32, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -792,25 +874,21 @@ machineFunctionInfo: body: | bb.0: ; MUBUFW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW64: $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc - ; MUBUFW64-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; MUBUFW32: $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def $scc - ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 killed $sgpr4, 32, implicit-def $scc - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 $sgpr4, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc - ; FLATSCRW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: $sgpr4 = S_ADD_I32 $sgpr32, 32, implicit-def $scc - ; FLATSCRW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, 8, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 $sgpr32, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -835,7 +913,7 @@ body: | ; MUBUFW64-NEXT: {{ $}} ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm @@ -843,15 +921,15 @@ body: | ; MUBUFW32-NEXT: {{ $}} ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 8, 32, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 40, 0, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 8, %stack.1, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc @@ -876,7 +954,7 @@ body: | ; MUBUFW64-NEXT: {{ $}} ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; MUBUFW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm @@ -884,17 +962,503 @@ body: | ; MUBUFW32-NEXT: {{ $}} ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW64-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc ; ; FLATSCRW32-LABEL: name: s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm - ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 32, 8, implicit-def $scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_I32 0, 40, implicit-def $scc ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc renamable $sgpr7 = S_ADD_I32 %stack.1, 8, implicit-def $scc SI_RETURN implicit $sgpr7, implicit $scc ... + +--- +name: s_add_i32__0__fi_offset0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_i32__0__fi_offset0 + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__0__fi_offset0 + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__0__fi_offset0 + ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__0__fi_offset0 + ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 0, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... + +--- +name: s_add_i32__fi_offset0__0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__0 + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY $sgpr4 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__0 + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY $sgpr4 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__0 + ; FLATSCRW64: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__0 + ; FLATSCRW32: renamable $sgpr7 = COPY $sgpr32 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 %stack.0, 0, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... + +--- +name: s_add_i32__same_sgpr__fi_offset0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr7 + ; MUBUFW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 + ; MUBUFW64: liveins: $sgpr7 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 + ; MUBUFW32: liveins: $sgpr7 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr7, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__same_sgpr__fi_offset0 + ; FLATSCRW64: liveins: $sgpr7 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__same_sgpr__fi_offset0 + ; FLATSCRW32: liveins: $sgpr7 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr7, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 $sgpr7, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... + +--- +name: s_add_i32__different_sgpr__fi_offset0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr8 + ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 + ; MUBUFW64: liveins: $sgpr8 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 + ; MUBUFW32: liveins: $sgpr8 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0 + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0 + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... + +--- +name: s_add_i32__different_sgpr__fi_offset0_live_after +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr8 + ; MUBUFW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after + ; MUBUFW64: liveins: $sgpr8 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after + ; MUBUFW32: liveins: $sgpr8 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr7 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__different_sgpr__fi_offset0_live_after + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr7 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $sgpr8 + renamable $sgpr7 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr7, implicit $sgpr8 + +... + +--- +name: s_add_i32__identity_sgpr__fi_offset0__kernel +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel + ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel + ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0__kernel + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... + +--- +name: s_add_i32__fi_offset0__identity_sgpr__kernel +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel + ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel + ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset0__identity_sgpr__kernel + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY $sgpr8 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... + +--- +name: s_add_i32__identity_sgpr__fi_offset32__kernel +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel + ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel + ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset32__kernel + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... + +--- +name: s_add_i32__fi_offset32__identity_sgpr__kernel +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel + ; MUBUFW64: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel + ; MUBUFW32: liveins: $sgpr8, $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr__kernel + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 $sgpr8, 32, implicit-def dead $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... + + +--- +name: s_add_i32__identity_sgpr__fi_offset0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 + ; MUBUFW64: liveins: $sgpr8 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 + ; MUBUFW32: liveins: $sgpr8 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr8 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__identity_sgpr__fi_offset0 + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr4 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... + +--- +name: s_add_i32__fi_offset32__identity_sgpr +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } + - { id: 1, size: 64, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr8 + + ; MUBUFW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr + ; MUBUFW64: liveins: $sgpr8 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; MUBUFW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr + ; MUBUFW32: liveins: $sgpr8 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed $sgpr4, $sgpr8, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr8, 32, implicit-def dead $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW64-LABEL: name: s_add_i32__fi_offset32__identity_sgpr + ; FLATSCRW64: liveins: $sgpr8 + ; FLATSCRW64-NEXT: {{ $}} + ; FLATSCRW64-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW64-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr8 + ; + ; FLATSCRW32-LABEL: name: s_add_i32__fi_offset32__identity_sgpr + ; FLATSCRW32: liveins: $sgpr8 + ; FLATSCRW32-NEXT: {{ $}} + ; FLATSCRW32-NEXT: renamable $sgpr4 = S_ADD_I32 killed $sgpr32, $sgpr8, implicit-def dead $scc + ; FLATSCRW32-NEXT: renamable $sgpr8 = S_ADD_I32 killed renamable $sgpr4, 32, implicit-def dead $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr8 + renamable $sgpr8 = S_ADD_I32 $sgpr8, %stack.1, implicit-def dead $scc + SI_RETURN implicit $sgpr8 + +... diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index 89da9b8e75bc9..9d9d5b239a12c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -101,7 +101,6 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -237,7 +236,6 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -375,7 +373,6 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -514,8 +511,6 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -652,11 +647,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -795,11 +789,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -939,8 +932,6 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -1077,11 +1068,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS @@ -1219,11 +1209,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX12-SDAG-NEXT: s_load_b32 s0, s[2:3], 0x24 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, 0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v1, s0 offset:1 scope:SCOPE_SYS ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 ; GFX12-SDAG-NEXT: scratch_store_b8 v0, v2, s0 offset:2 scope:SCOPE_SYS diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 14d8b71c5167a..9a9fd289e2d0c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -381,11 +381,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 -; GFX9-NEXT: s_add_i32 s1, s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, 0 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -402,8 +400,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-NEXT: s_and_b32 s1, s0, 15 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-NEXT: s_add_i32 s0, s0, 0 -; GFX10-NEXT: s_add_i32 s1, s1, 0 ; GFX10-NEXT: scratch_store_dword off, v0, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -418,8 +414,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_add_i32 s0, s0, 0 -; GFX11-NEXT: s_add_i32 s1, s1, 0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc @@ -434,8 +428,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: s_lshl_b32 s1, s1, 2 -; GFX12-NEXT: s_add_co_i32 s0, s0, 0 -; GFX12-NEXT: s_add_co_i32 s1, s1, 0 ; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS @@ -455,11 +447,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 -; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0 ; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -471,11 +461,9 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_lshl_b32 s1, s0, 2 ; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_add_i32 s1, s1, 0 ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s0, 0 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm @@ -497,8 +485,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0 -; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -513,8 +499,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0 -; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0 ; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc @@ -529,8 +513,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) { ; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0 -; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0 ; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_storecnt 0x0 ; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS @@ -552,13 +534,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s2, 2 -; GFX9-NEXT: s_add_i32 s0, s0, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_and_b32 s0, s2, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_i32 s0, s0, 0 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -573,8 +553,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX10-NEXT: s_and_b32 s0, s2, 15 ; GFX10-NEXT: s_lshl_b32 s1, s2, 2 ; GFX10-NEXT: s_lshl_b32 s0, s0, 2 -; GFX10-NEXT: s_add_i32 s1, s1, 0 -; GFX10-NEXT: s_add_i32 s0, s0, 0 ; GFX10-NEXT: scratch_store_dword off, v0, s1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, off, s0 glc dlc @@ -587,8 +565,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX11-NEXT: s_and_b32 s1, s0, 15 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-NEXT: s_add_i32 s0, s0, 0 -; GFX11-NEXT: s_add_i32 s1, s1, 0 ; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc @@ -601,8 +577,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX12-NEXT: s_and_b32 s1, s0, 15 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: s_lshl_b32 s1, s1, 2 -; GFX12-NEXT: s_add_co_i32 s0, s0, 0 -; GFX12-NEXT: s_add_co_i32 s1, s1, 0 ; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS @@ -621,11 +595,9 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-PAL-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-PAL-NEXT: s_and_b32 s0, s0, 15 -; GFX9-PAL-NEXT: s_add_i32 s1, s1, 0 -; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s1 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0 +; GFX9-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX9-PAL-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_endpgm @@ -633,13 +605,11 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX940-LABEL: store_load_sindex_foo: ; GFX940: ; %bb.0: ; %bb ; GFX940-NEXT: s_lshl_b32 s1, s0, 2 -; GFX940-NEXT: s_and_b32 s0, s0, 15 -; GFX940-NEXT: s_add_i32 s1, s1, 0 ; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_lshl_b32 s0, s0, 2 +; GFX940-NEXT: s_and_b32 s0, s0, 15 ; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: s_add_i32 s0, s0, 0 +; GFX940-NEXT: s_lshl_b32 s0, s0, 2 ; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm @@ -659,8 +629,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX10-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX10-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX10-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0 -; GFX10-PAL-NEXT: s_add_i32 s1, s1, 0 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s0 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, off, s1 glc dlc @@ -673,8 +641,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX11-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0 -; GFX11-PAL-NEXT: s_add_i32 s1, s1, 0 ; GFX11-PAL-NEXT: scratch_store_b32 off, v0, s0 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc @@ -687,8 +653,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) { ; GFX12-PAL-NEXT: s_and_b32 s1, s0, 15 ; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-PAL-NEXT: s_lshl_b32 s1, s1, 2 -; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, 0 -; GFX12-PAL-NEXT: s_add_co_i32 s1, s1, 0 ; GFX12-PAL-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_storecnt 0x0 ; GFX12-PAL-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS @@ -3693,12 +3657,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-NEXT: s_add_i32 s1, s32, 4 +; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_i32 s1, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: s_add_i32 s0, s1, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3710,10 +3674,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 13 -; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-NEXT: s_add_i32 s1, s32, 4 -; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: s_add_i32 s1, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3755,12 +3719,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 -; GFX9-PAL-NEXT: s_add_i32 s1, s32, 4 +; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3786,10 +3750,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 4 -; GFX10-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index 34c7614ae36f9..f388aeb047029 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -55,8 +55,8 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 killed $sgpr0, 4, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_I32 $sgpr0, 4, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr4, implicit $exec ; GCN-NEXT: $m0 = S_MOV_B32 -1 ; GCN-NEXT: DS_WRITE_B32 undef renamable $vgpr0, killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 49531e3b4f8f3..f6a77a763c2cd 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -161,8 +161,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_mov_b32 s0, 0 ; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop ; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1 -; FLATSCR-NEXT: s_add_i32 s3, s33, 0x3000 -; FLATSCR-NEXT: s_add_i32 s1, s0, s3 +; FLATSCR-NEXT: s_add_i32 s3, s33, s0 +; FLATSCR-NEXT: s_add_i32 s1, s3, 0x3000 ; FLATSCR-NEXT: s_add_i32 s0, s0, 1 ; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120 ; FLATSCR-NEXT: scratch_store_byte off, v2, s1 @@ -170,8 +170,8 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1 ; FLATSCR-NEXT: ; %bb.2: ; %split ; FLATSCR-NEXT: s_movk_i32 s0, 0x2000 -; FLATSCR-NEXT: s_add_i32 s1, s33, 0x3000 -; FLATSCR-NEXT: s_add_i32 s0, s0, s1 +; FLATSCR-NEXT: s_add_i32 s1, s33, s0 +; FLATSCR-NEXT: s_add_i32 s0, s1, 0x3000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000