From 1a76402c6040fa4c83c76e751aa97dc3b29590e9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 1 Oct 2024 16:04:32 +0400 Subject: [PATCH 1/2] AMDGPU: Add baseline test for frame index folding We currently can increse the instruction count when a frame index requires materialization. --- .../fold-operands-frame-index.gfx10.mir | 87 +++++ .../AMDGPU/fold-operands-frame-index.mir | 354 ++++++++++++++++++ 2 files changed, 441 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir create mode 100644 llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir new file mode 100644 index 0000000000000..76183ece264ff --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir @@ -0,0 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s + +--- +name: fold_frame_index__v_add_u32_e32__const_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e32 128, %0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + +--- +name: fold_frame_index__v_add_co_u32_e64__v_fi_const +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 128, 0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + +--- +name: fold_frame_index__v_add_co_u32_e64___fi_const_v +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 128, %0, 0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + +--- +name: fold_frame_index__v_add_co_u32_e64__v_fi_imm +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm + ; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir new file mode 100644 index 0000000000000..da094745ec3d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir @@ -0,0 +1,354 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX12 %s + +--- +name: fold_frame_index__s_add_i32__fi_const +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_const + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 128, implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:sreg_32 = S_ADD_I32 %0, 128, implicit-def $scc + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__s_add_i32__const_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__const_fi + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 128, [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:sreg_32 = S_ADD_I32 128, %0, implicit-def $scc + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__s_add_i32__materializedconst_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__materializedconst_fi + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = S_MOV_B32 256 + %1:sreg_32 = S_MOV_B32 %stack.0 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__s_add_i32__fi_materializedconst_0 +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = S_MOV_B32 %stack.0 + %1:sreg_32 = S_MOV_B32 256 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + + +--- +name: fold_frame_index__s_add_i32__fi_materializedconst_1 +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_1 + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 256, [[S_MOV_B32_]], implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = S_MOV_B32 256 + %1:sreg_32 = S_MOV_B32 %stack.0 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__s_add_i32__reg_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + liveins: $sgpr4 + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__reg_fi + ; CHECK: liveins: $sgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], %stack.0, implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = COPY $sgpr4 + %1:sreg_32 = S_MOV_B32 %stack.0 + %2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__s_add_i32__fi_reg +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + liveins: $sgpr4 + ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_reg + ; CHECK: liveins: $sgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[COPY]], implicit-def $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:sreg_32 = COPY $sgpr4 + %1:sreg_32 = S_MOV_B32 %stack.0 + %2:sreg_32 = S_ADD_I32 %1, %0, implicit-def $scc + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__v_add_u32_e32__const_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec + ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e32 128, %0, implicit $exec + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__v_add_u32_e32__materialized_v_const_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__materialized_v_const_v_fi + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 128, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit $exec + ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 128, implicit $exec + %2:vgpr_32 = V_ADD_U32_e32 %1, %0, implicit $exec + $sgpr4 = COPY %2 + SI_RETURN implicit $sgpr4 +... + + +--- +name: fold_frame_index__v_add_co_u32_e32__const_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e32__const_v_fi + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec + ; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e32_]] + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_CO_U32_e32 128, %0, implicit-def $vcc, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + +--- +name: fold_frame_index__v_add_co_u32_e64__v_fi_const +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX9-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec + ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + +--- +name: multi_use_scalar_fi__add_imm_add_inline_imm +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: multi_use_scalar_fi__add_imm_add_inline_imm + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MOV_B32_]], 16380, implicit-def dead $scc + ; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 56, implicit-def dead $scc + ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] + ; CHECK-NEXT: $sgpr5 = COPY [[S_ADD_I32_1]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4, implicit $sgpr5 + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = COPY $sgpr1 + %2:sreg_32 = S_MOV_B32 16380 + %3:sreg_32 = S_MOV_B32 56 + %4:sreg_32 = S_MOV_B32 %stack.0 + %5:sreg_32 = S_ADD_I32 %4, killed %2, implicit-def dead $scc + %6:sreg_32 = S_ADD_I32 %4, killed %3, implicit-def dead $scc + $sgpr4 = COPY %5 + $sgpr5 = COPY %6 + SI_RETURN implicit $sgpr4, implicit $sgpr5 +... + +--- +name: multi_add_use_vector_fi__add_imm_add_inline_imm +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX9-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], killed [[COPY1]], 0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 56, 0, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; GFX9-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]] + ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm + ; GFX10: liveins: $vgpr0, $vgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, killed [[COPY1]], 0, implicit $exec + ; GFX10-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 56, 0, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; GFX10-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]] + ; GFX10-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; + ; GFX12-LABEL: name: multi_add_use_vector_fi__add_imm_add_inline_imm + ; GFX12: liveins: $vgpr0, $vgpr1 + ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX12-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, killed [[COPY1]], 0, implicit $exec + ; GFX12-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 56, 0, implicit $exec + ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; GFX12-NEXT: $vgpr1 = COPY [[V_ADD_U32_e64_1]] + ; GFX12-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %3:vgpr_32 = V_ADD_U32_e64 %2, killed %1, 0, implicit $exec + %4:vgpr_32 = V_MOV_B32_e32 999, implicit $exec + %5:vgpr_32 = COPY %3 + %6:sreg_32 = S_MOV_B32 56 + %7:vgpr_32 = V_ADD_U32_e64 %2, killed %6, 0, implicit $exec + %8:vgpr_32 = COPY %7 + $vgpr0 = COPY %3 + $vgpr1 = COPY %7 + SI_RETURN implicit $vgpr0, implicit $vgpr1 + +... From 489d5161eae75d77f94de8247c1443f517a29cc5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 2 Oct 2024 09:07:35 +0400 Subject: [PATCH 2/2] Test more cases --- .../fold-operands-frame-index.gfx10.mir | 44 ++++++++ .../AMDGPU/fold-operands-frame-index.mir | 100 +++++++++++++++++- 2 files changed, 140 insertions(+), 4 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir index 76183ece264ff..0d6511cbfceb2 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.gfx10.mir @@ -44,6 +44,50 @@ body: | SI_RETURN implicit $vgpr0 ... + + +--- +name: fold_frame_index__v_add_u32_e64__const_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec + ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 128, %0, 0, implicit $exec + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__v_add_u32_e64___v_fi_const +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec + ; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; CHECK-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 %0, 128, 0, implicit $exec + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + --- name: fold_frame_index__v_add_co_u32_e64___fi_const_v tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir index da094745ec3d4..aa91a4f9f988f 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir @@ -205,6 +205,67 @@ body: | SI_RETURN implicit $sgpr4 ... +--- +name: fold_frame_index__v_add_u32_e64__imm_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX9-NEXT: SI_RETURN implicit $sgpr4 + ; + ; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi + ; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec + ; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX10-NEXT: SI_RETURN implicit $sgpr4 + ; + ; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64__imm_v_fi + ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 64, %stack.0, 0, implicit $exec + ; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX12-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 64, %0, 0, implicit $exec + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... + +--- +name: fold_frame_index__v_add_u32_e64___v_fi_imm +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; GFX9-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec + ; GFX9-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX9-NEXT: SI_RETURN implicit $sgpr4 + ; + ; GFX10-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm + ; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec + ; GFX10-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX10-NEXT: SI_RETURN implicit $sgpr4 + ; + ; GFX12-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_imm + ; GFX12: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 %stack.0, 64, 0, implicit $exec + ; GFX12-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]] + ; GFX12-NEXT: SI_RETURN implicit $sgpr4 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32 = V_ADD_U32_e64 %0, 64, 0, implicit $exec + $sgpr4 = COPY %1 + SI_RETURN implicit $sgpr4 +... --- name: fold_frame_index__v_add_co_u32_e32__const_v_fi @@ -228,7 +289,7 @@ body: | ... --- -name: fold_frame_index__v_add_co_u32_e64__v_fi_const +name: fold_frame_index__v_add_co_u32_e64__v_fi_imm tracksRegLiveness: true frameInfo: maxAlignment: 4 @@ -237,18 +298,18 @@ stack: - { id: 0, size: 16384, alignment: 4, local-offset: 0 } body: | bb.0: - ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 64, 0, implicit $exec ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] ; GFX9-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; - ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const + ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] ; GFX12-NEXT: SI_RETURN implicit $vgpr0 @@ -258,6 +319,37 @@ body: | SI_RETURN implicit $vgpr0 ... +--- +name: fold_frame_index__v_add_co_u32_e64__imm_v_fi +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 + localFrameSize: 16384 +stack: + - { id: 0, size: 16384, alignment: 4, local-offset: 0 } +body: | + bb.0: + ; GFX9-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX9-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX10-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec + ; GFX10-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX10-NEXT: SI_RETURN implicit $vgpr0 + ; + ; GFX12-LABEL: name: fold_frame_index__v_add_co_u32_e64__imm_v_fi + ; GFX12: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec + ; GFX12-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]] + ; GFX12-NEXT: SI_RETURN implicit $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 64, %0, 0, implicit $exec + $vgpr0 = COPY %1 + SI_RETURN implicit $vgpr0 +... + --- name: multi_use_scalar_fi__add_imm_add_inline_imm tracksRegLiveness: true