Skip to content

Commit 06bbeda

Browse files
arsenmSterling-Augustine
authored andcommitted
AMDGPU: Add baseline test for frame index folding (llvm#110737)
We currently can increase the instruction count when a frame index requires materialization.
1 parent 8675fcf commit 06bbeda

File tree

2 files changed

+577
-0
lines changed

2 files changed

+577
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
4+
5+
---
6+
name: fold_frame_index__v_add_u32_e32__const_v_fi
7+
tracksRegLiveness: true
8+
frameInfo:
9+
maxAlignment: 4
10+
localFrameSize: 16384
11+
stack:
12+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
13+
body: |
14+
bb.0:
15+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e32__const_v_fi
16+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
17+
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[V_MOV_B32_e32_]], implicit $exec
18+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_U32_e32_]]
19+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
20+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
21+
%1:vgpr_32 = V_ADD_U32_e32 128, %0, implicit $exec
22+
$vgpr0 = COPY %1
23+
SI_RETURN implicit $vgpr0
24+
...
25+
26+
---
27+
name: fold_frame_index__v_add_co_u32_e64__v_fi_const
28+
tracksRegLiveness: true
29+
frameInfo:
30+
maxAlignment: 4
31+
localFrameSize: 16384
32+
stack:
33+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
34+
body: |
35+
bb.0:
36+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_const
37+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
38+
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
39+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
40+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
41+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
42+
%1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 128, 0, implicit $exec
43+
$vgpr0 = COPY %1
44+
SI_RETURN implicit $vgpr0
45+
...
46+
47+
48+
49+
---
50+
name: fold_frame_index__v_add_u32_e64__const_v_fi
51+
tracksRegLiveness: true
52+
frameInfo:
53+
maxAlignment: 4
54+
localFrameSize: 16384
55+
stack:
56+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
57+
body: |
58+
bb.0:
59+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64__const_v_fi
60+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
61+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
62+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
63+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
64+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
65+
%1:vgpr_32 = V_ADD_U32_e64 128, %0, 0, implicit $exec
66+
$sgpr4 = COPY %1
67+
SI_RETURN implicit $sgpr4
68+
...
69+
70+
---
71+
name: fold_frame_index__v_add_u32_e64___v_fi_const
72+
tracksRegLiveness: true
73+
frameInfo:
74+
maxAlignment: 4
75+
localFrameSize: 16384
76+
stack:
77+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
78+
body: |
79+
bb.0:
80+
; CHECK-LABEL: name: fold_frame_index__v_add_u32_e64___v_fi_const
81+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
82+
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], 128, 0, implicit $exec
83+
; CHECK-NEXT: $sgpr4 = COPY [[V_ADD_U32_e64_]]
84+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
85+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
86+
%1:vgpr_32 = V_ADD_U32_e64 %0, 128, 0, implicit $exec
87+
$sgpr4 = COPY %1
88+
SI_RETURN implicit $sgpr4
89+
...
90+
91+
---
92+
name: fold_frame_index__v_add_co_u32_e64___fi_const_v
93+
tracksRegLiveness: true
94+
frameInfo:
95+
maxAlignment: 4
96+
localFrameSize: 16384
97+
stack:
98+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
99+
body: |
100+
bb.0:
101+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64___fi_const_v
102+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
103+
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 128, [[V_MOV_B32_e32_]], 0, implicit $exec
104+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
105+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
106+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
107+
%1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 128, %0, 0, implicit $exec
108+
$vgpr0 = COPY %1
109+
SI_RETURN implicit $vgpr0
110+
...
111+
112+
---
113+
name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
114+
tracksRegLiveness: true
115+
frameInfo:
116+
maxAlignment: 4
117+
localFrameSize: 16384
118+
stack:
119+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
120+
body: |
121+
bb.0:
122+
; CHECK-LABEL: name: fold_frame_index__v_add_co_u32_e64__v_fi_imm
123+
; CHECK: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 %stack.0, 64, 0, implicit $exec
124+
; CHECK-NEXT: $vgpr0 = COPY [[V_ADD_CO_U32_e64_]]
125+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
126+
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
127+
%1:vgpr_32, %2:sreg_32 = V_ADD_CO_U32_e64 %0, 64, 0, implicit $exec
128+
$vgpr0 = COPY %1
129+
SI_RETURN implicit $vgpr0
130+
...
131+

0 commit comments

Comments
 (0)