Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,23 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
return false;

const unsigned Opc = UseMI.getOpcode();
switch (Opc) {
case AMDGPU::S_ADD_I32:
case AMDGPU::V_ADD_U32_e32:
case AMDGPU::V_ADD_CO_U32_e32:
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have
// to insert the wave size shift at every point we use the index.
// TODO: Fix depending on visit order to fold immediates into the operand
return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
case AMDGPU::V_ADD_U32_e64:
case AMDGPU::V_ADD_CO_U32_e64:
return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
MRI->hasOneNonDBGUse(UseMI.getOperand(OpNo).getReg());
default:
break;
}

if (TII->isMUBUF(UseMI))
return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
if (!TII->isFLATScratch(UseMI))
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AMDGPU/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4705,8 +4705,7 @@ define amdgpu_ps void @large_offset() {
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_movk_i32 s0, 0x810
; GFX10-NEXT: s_addk_i32 s0, 0x3c0
; GFX10-NEXT: s_movk_i32 s0, 0xbd0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: v_mov_b32_e32 v2, v0
; GFX10-NEXT: v_mov_b32_e32 v3, v0
Expand Down Expand Up @@ -4823,8 +4822,7 @@ define amdgpu_ps void @large_offset() {
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 0
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x810
; GFX10-PAL-NEXT: s_addk_i32 s0, 0x3c0
; GFX10-PAL-NEXT: s_movk_i32 s0, 0xbd0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, v0
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, v0
; GFX10-PAL-NEXT: v_mov_b32_e32 v3, v0
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ body: |
bb.0:

; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, %stack.0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
%0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand Down
15 changes: 5 additions & 10 deletions llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand All @@ -34,8 +33,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 128, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand All @@ -57,8 +55,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, %stack.0, 0, implicit $exec
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand All @@ -78,8 +75,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 128, 0, implicit $exec
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand All @@ -99,8 +95,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, %stack.0, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand Down
94 changes: 22 additions & 72 deletions llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_const
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 128, implicit-def $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:sreg_32 = S_MOV_B32 %stack.0
Expand All @@ -35,8 +34,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__s_add_i32__const_fi
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, %stack.0, implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:sreg_32 = S_MOV_B32 %stack.0
Expand All @@ -56,8 +54,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__s_add_i32__materializedconst_fi
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:sreg_32 = S_MOV_B32 256
Expand Down Expand Up @@ -101,8 +98,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, %stack.0, implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:sreg_32 = S_MOV_B32 256
Expand Down Expand Up @@ -173,8 +169,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand Down Expand Up @@ -215,21 +210,10 @@ stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX9-NEXT: SI_RETURN implicit $sgpr4
;
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX10-NEXT: SI_RETURN implicit $sgpr4
;
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX12-NEXT: SI_RETURN implicit $sgpr4
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec
$sgpr4 = COPY %1
Expand All @@ -246,21 +230,10 @@ stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX9-NEXT: SI_RETURN implicit $sgpr4
;
; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX10-NEXT: SI_RETURN implicit $sgpr4
;
; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; GFX12-NEXT: SI_RETURN implicit $sgpr4
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $sgpr4
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec
$sgpr4 = COPY %1
Expand All @@ -278,8 +251,7 @@ stack:
body: |
bb.0:
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e32__const_v_fi
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
; CHECK: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def $vcc, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
Expand All @@ -298,21 +270,10 @@ stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX9-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX12-NEXT: SI_RETURN implicit $vgpr0
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
$vgpr0 = COPY %1
Expand All @@ -329,21 +290,10 @@ stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX9-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; GFX12-NEXT: SI_RETURN implicit $vgpr0
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
%1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec
$vgpr0 = COPY %1
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ define void @func_mov_fi_i32_offset() #0 {
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SCALED:v[0-9]+]], 6, s32
; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 4, [[SCALED]]

; GFX9-FLATSCR: v_mov_b32_e32 [[ADD:v[0-9]+]], s32
; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 4, [[ADD]]
; FIXME: Should commute and shrink
; GFX9-FLATSCR: v_add_u32_e64 v0, 4, s32

; GCN-NOT: v_mov
; GCN: ds_write_b32 v0, v0
Expand Down Expand Up @@ -164,12 +164,12 @@ define void @void_func_byval_struct_i8_i32_ptr_value(ptr addrspace(5) byval({ i8
; GFX9-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, s32 offset:4 glc{{$}}

; CI: v_lshr_b32_e64 [[SHIFT:v[0-9]+]], s32, 6
; CI: v_add_i32_e32 [[GEP:v[0-9]+]], vcc, 4, [[SHIFT]]
; CI: v_add_i32_e64 [[GEP:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 4, [[SHIFT]]

; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
; GFX9-FLATSCR: v_mov_b32_e32 [[SP:v[0-9]+]], s32
; GFX9-MUBUF: v_lshrrev_b32_e64 [[SP:v[0-9]+]], 6, s32
; GFX9-MUBUF: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]

; GFX9: v_add_u32_e32 [[GEP:v[0-9]+]], 4, [[SP]]
; GFX9-FLATSCR: v_add_u32_e64 [[GEP:v[0-9]+]], 4, s32

; GCN: ds_write_b32 v{{[0-9]+}}, [[GEP]]
define void @void_func_byval_struct_i8_i32_ptr_nonentry_block(ptr addrspace(5) byval({ i8, i32 }) %arg0, i32 %arg2) #0 {
Expand Down
Loading
Loading