diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7d6990c097774..128cd8244a477 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2713,7 +2713,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, return true; } - case AMDGPU::S_ADD_I32: { + case AMDGPU::S_ADD_I32: + case AMDGPU::S_ADD_U32: { // TODO: Handle s_or_b32, s_and_b32. unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1; MachineOperand &OtherOp = MI->getOperand(OtherOpIdx); @@ -2773,7 +2774,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, DstReg = TmpReg; } - auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32)) + auto AddI32 = BuildMI(*MBB, *MI, DL, MI->getDesc()) .addDef(DstReg, RegState::Renamable) .addReg(MaterializedReg, RegState::Kill) .add(OtherOp); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll index c3b48b5d2ddff..378c6312c52be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -142,13 +142,12 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr ; GCN-NEXT: v_mov_b32_e32 v0, s48 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:240 ; GCN-NEXT: v_mov_b32_e32 v0, s49 -; GCN-NEXT: s_and_b32 s4, s25, 63 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:244 ; GCN-NEXT: v_mov_b32_e32 v0, s50 -; GCN-NEXT: s_lshl_b32 s4, s4, 2 +; GCN-NEXT: s_and_b32 s4, s25, 63 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:248 ; GCN-NEXT: v_mov_b32_e32 v0, s51 -; GCN-NEXT: s_add_u32 s4, 0, s4 +; GCN-NEXT: s_lshl_b32 s4, s4, 2 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:252 ; GCN-NEXT: v_mov_b32_e32 v0, s24 ; GCN-NEXT: v_mov_b32_e32 v1, s4 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir new file mode 100644 index 0000000000000..af61bd70f16b6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir @@ -0,0 +1,123 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=MUBUFW32 %s + +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW64 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s + +--- +name: s_add_u32__inline_imm__fi_offset0 +tracksRegLiveness: true +stack: + - { id: 0, size: 32, alignment: 16 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_u32__inline_imm__fi_offset0 + ; MUBUFW64: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_u32__inline_imm__fi_offset0 + ; MUBUFW32: renamable $sgpr4 = S_LSHR_B32 $sgpr32, 5, implicit-def dead $scc + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 12, $sgpr4, implicit-def dead $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_u32__inline_imm__fi_offset0 + ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_u32__inline_imm__fi_offset0 + ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 12, $sgpr32, implicit-def dead $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_U32 12, %stack.0, implicit-def dead $scc + SI_RETURN implicit $sgpr7 + +... + +--- +name: s_add_u32__kernel__literal__fi_offset96__offset_literal +tracksRegLiveness: true +stack: + - { id: 0, size: 96, alignment: 16 } + - { id: 1, size: 128, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal + ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal + ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr7 = S_MOV_B32 164 + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal + ; FLATSCRW64: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7 + ; + ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal + ; FLATSCRW32: renamable $sgpr7 = S_MOV_B32 164 + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7 + renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def dead $scc + SI_RETURN implicit $sgpr7 +... + +--- +name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc +tracksRegLiveness: true +stack: + - { id: 0, size: 96, alignment: 16 } + - { id: 1, size: 128, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + isEntryFunction: true +body: | + bb.0: + ; MUBUFW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc + ; MUBUFW64: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: {{ $}} + ; MUBUFW64-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW64-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; MUBUFW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; MUBUFW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc + ; MUBUFW32: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: {{ $}} + ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; MUBUFW32-NEXT: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; MUBUFW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; FLATSCRW64-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc + ; FLATSCRW64: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW64-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + ; + ; FLATSCRW32-LABEL: name: s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc + ; FLATSCRW32: renamable $sgpr7 = S_ADD_U32 164, 0, implicit-def $scc + ; FLATSCRW32-NEXT: SI_RETURN implicit $sgpr7, implicit $scc + renamable $sgpr7 = S_ADD_U32 68, %stack.1, implicit-def $scc + SI_RETURN implicit $sgpr7, implicit $scc +... diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll index 346b69c362c04..96d0e383761d1 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll @@ -38,7 +38,6 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 @@ -76,12 +75,9 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_add_nc_u32 v0, s0, v0 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v0 ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, 4, v0 @@ -113,8 +109,7 @@ define amdgpu_kernel void @soff1_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 @@ -168,7 +163,6 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -207,11 +201,9 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 @@ -246,11 +238,9 @@ define amdgpu_kernel void @soff1_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 @@ -304,7 +294,6 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX942-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -343,11 +332,9 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 @@ -382,11 +369,9 @@ define amdgpu_kernel void @soff1_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 @@ -440,7 +425,6 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 @@ -483,8 +467,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 @@ -520,8 +503,7 @@ define amdgpu_kernel void @soff2_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 @@ -576,7 +558,6 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -616,11 +597,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -657,11 +637,10 @@ define amdgpu_kernel void @soff2_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS @@ -717,7 +696,6 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -757,11 +735,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -798,11 +775,10 @@ define amdgpu_kernel void @soff2_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 1 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS @@ -857,7 +833,6 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v3, 2, v0 @@ -900,8 +875,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX11-GISEL-NEXT: v_mov_b32_e32 v3, 4 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_add_nc_u32 v5, 2, v0 @@ -937,8 +911,7 @@ define amdgpu_kernel void @soff4_voff1(i32 %soff) { ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 @@ -993,7 +966,6 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -1033,11 +1005,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 1, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1074,11 +1045,10 @@ define amdgpu_kernel void @soff4_voff2(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS @@ -1133,7 +1103,6 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s0, v0 ; GFX942-GISEL-NEXT: v_add_u32_e32 v2, 1, v0 ; GFX942-GISEL-NEXT: scratch_store_byte v2, v1, off sc0 sc1 @@ -1173,11 +1142,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX11-GISEL: ; %bb.0: ; %bb ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -1214,11 +1182,10 @@ define amdgpu_kernel void @soff4_voff4(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_lshl_b32 s0, s0, 2 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:1 scope:SCOPE_SYS @@ -1263,7 +1230,6 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { ; GFX942-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-GISEL-NEXT: v_mov_b32_e32 v1, 1 ; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-GISEL-NEXT: s_add_u32 s0, 0, s0 ; GFX942-GISEL-NEXT: v_add3_u32 v0, s0, v0, -1 ; GFX942-GISEL-NEXT: scratch_store_byte v0, v1, off sc0 sc1 ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) @@ -1285,8 +1251,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { ; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-NEXT: s_add_u32 s0, 0, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX11-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 dlc ; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1306,8 +1271,7 @@ define amdgpu_kernel void @soff1_voff1_negative(i32 %soff) { ; GFX12-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_and_b32 v0, 0x3ff, v0 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_add_co_u32 s0, 0, s0 -; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-GISEL-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX12-GISEL-NEXT: scratch_store_b8 v0, v1, off offset:-1 scope:SCOPE_SYS ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index b1ea275a97a39..004403f46a4d4 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -346,4 +346,32 @@ entry: ret void } +; Check for "SOP2/SOPC instruction requires too many immediate +; constants" verifier error. Frame index would fold into low half of +; the lowered flat pointer add, and use s_add_u32 instead of +; s_add_i32. + +; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error: +; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010 +; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0 +define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 { +entry: + %.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5) + %Total3.i.i = alloca [1024 x i32], align 16, addrspace(5) + %Total3.ascast.i.i = addrspacecast ptr addrspace(5) %Total3.i.i to ptr + %gep = getelementptr i8, ptr %Total3.ascast.i.i, i64 4096 + %p2i = ptrtoint ptr %gep to i64 + br label %.shuffle.then.i.i.i.i + +.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry + store i64 0, ptr addrspace(5) null, align 4 + %icmp = icmp ugt i64 %p2i, 1 + br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i + +vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i + %wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4 + store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4 + ret void +} + attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index 5815d2bfe7bf8..81bd8baaa0e5d 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -91,8 +91,8 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $sgpr0, 4, implicit-def $scc + ; GCN-NEXT: renamable $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr0, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc ; GCN-NEXT: $sgpr0 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $sgpr0 = S_ADD_I32 killed $sgpr0, 8, implicit-def $scc