Skip to content

Commit 597f93d

Browse files
authored
AMDGPU: Check if immediate is legal for av_mov_b32_imm_pseudo (#160819)
This is primarily to avoid folding a frame index materialized into an SGPR into the pseudo; this would end up looking like: %sreg = s_mov_b32 %stack.0 %av_32 = av_mov_b32_imm_pseudo %sreg Which is not useful. Match the check used for the b64 case. This is limited to the pseudo to avoid regression due to gfx908's special case - it is expecting to pass here with v_accvgpr_write_b32 for illegal cases, and stay in the intermediate state with an sgpr input. This avoids regressions in a future patch.
1 parent f8d547f commit 597f93d

File tree

4 files changed

+146
-6
lines changed

4 files changed

+146
-6
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,15 @@ void SIFoldOperandsImpl::foldOperand(
13131313
if (MovSrcRC) {
13141314
if (UseSubReg)
13151315
MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1316+
1317+
// FIXME: We should be able to directly check immediate operand legality
1318+
// for all cases, but gfx908 hacks break.
1319+
if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
1320+
(!OpToFold.isImm() ||
1321+
!TII->isImmOperandLegal(MovDesc, SrcIdx,
1322+
*OpToFold.getEffectiveImmVal())))
1323+
break;
1324+
13161325
if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
13171326
break;
13181327

llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ body: |
209209
bb.0:
210210
; GCN-LABEL: name: s_mov_b32_imm_65_copy_to_av_32
211211
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65, implicit $exec
212-
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
213-
; GCN-NEXT: S_ENDPGM 0, implicit [[AV_MOV_]]
212+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
213+
; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
214214
%0:sreg_32 = S_MOV_B32 65, implicit $exec
215215
%1:av_32 = COPY %0
216216
S_ENDPGM 0, implicit %1

llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ body: |
240240
bb.0:
241241
; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
242242
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
243-
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
244-
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
243+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
244+
; GCN-NEXT: $agpr0 = COPY [[COPY]]
245245
; GCN-NEXT: S_ENDPGM 0
246246
%0:sreg_32 = S_MOV_B32 999
247247
%1:av_32 = COPY %0
@@ -257,8 +257,8 @@ body: |
257257
bb.0:
258258
; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
259259
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
260-
; GCN-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[V_MOV_B32_e32_]], implicit $exec
261-
; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
260+
; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
261+
; GCN-NEXT: $agpr0 = COPY [[COPY]]
262262
; GCN-NEXT: S_ENDPGM 0
263263
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
264264
%1:av_32 = COPY %0
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck %s
3+
4+
---
5+
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
6+
tracksRegLiveness: true
7+
frameInfo:
8+
maxAlignment: 4
9+
localFrameSize: 16384
10+
stack:
11+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
12+
body: |
13+
bb.0:
14+
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_av
15+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
16+
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
17+
; CHECK-NEXT: SI_RETURN implicit [[AV_MOV_]]
18+
%0:sreg_32 = S_MOV_B32 %stack.0
19+
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
20+
SI_RETURN implicit %1
21+
22+
...
23+
24+
---
25+
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
26+
tracksRegLiveness: true
27+
frameInfo:
28+
maxAlignment: 4
29+
localFrameSize: 16384
30+
stack:
31+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
32+
body: |
33+
bb.0:
34+
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_fi_to_v
35+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
36+
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
37+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
38+
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
39+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
40+
%0:sreg_32 = S_MOV_B32 %stack.0
41+
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
42+
%2:vgpr_32 = COPY %1, implicit $exec
43+
$vgpr0 = COPY %2
44+
SI_RETURN implicit $vgpr0
45+
46+
...
47+
48+
---
49+
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
50+
tracksRegLiveness: true
51+
body: |
52+
bb.0:
53+
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_lit_to_v
54+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1234
55+
; CHECK-NEXT: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO [[S_MOV_B32_]], implicit $exec
56+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[AV_MOV_]], implicit $exec
57+
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
58+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
59+
%0:sreg_32 = S_MOV_B32 1234
60+
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
61+
%2:vgpr_32 = COPY %1, implicit $exec
62+
$vgpr0 = COPY %2
63+
SI_RETURN implicit $vgpr0
64+
65+
...
66+
67+
---
68+
name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
69+
tracksRegLiveness: true
70+
body: |
71+
bb.0:
72+
; CHECK-LABEL: name: fold_frame_index_av_mov_b32_imm_pseudo_from_s_mov_b32_imm_to_v
73+
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8, implicit $exec
74+
; CHECK-NEXT: $vgpr0 = COPY [[V_MOV_B32_e32_]]
75+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
76+
%0:sreg_32 = S_MOV_B32 8
77+
%1:av_32 = AV_MOV_B32_IMM_PSEUDO %0, implicit $exec
78+
%2:vgpr_32 = COPY %1, implicit $exec
79+
$vgpr0 = COPY %2
80+
SI_RETURN implicit $vgpr0
81+
82+
...
83+
84+
---
85+
name: fold_frame_index_av_regression_0
86+
tracksRegLiveness: true
87+
frameInfo:
88+
maxAlignment: 4
89+
localFrameSize: 16384
90+
stack:
91+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
92+
body: |
93+
bb.0:
94+
; CHECK-LABEL: name: fold_frame_index_av_regression_0
95+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
96+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
97+
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
98+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
99+
%0:sreg_32 = S_MOV_B32 %stack.0
100+
%1:av_32 = COPY %0
101+
%2:vgpr_32 = COPY %1, implicit $exec
102+
$vgpr0 = COPY %2
103+
SI_RETURN implicit $vgpr0
104+
105+
...
106+
107+
---
108+
name: fold_frame_index_av_regression_1
109+
tracksRegLiveness: true
110+
frameInfo:
111+
maxAlignment: 4
112+
localFrameSize: 16384
113+
stack:
114+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
115+
body: |
116+
bb.0:
117+
; CHECK-LABEL: name: fold_frame_index_av_regression_1
118+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
119+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
120+
; CHECK-NEXT: $vgpr0 = COPY [[COPY]]
121+
; CHECK-NEXT: SI_RETURN implicit $vgpr0
122+
%0:sreg_32 = S_MOV_B32 %stack.0
123+
%1:sreg_32 = S_MOV_B32 killed %0
124+
%2:sreg_64 = S_MOV_B64 0
125+
%3:av_32 = COPY %1
126+
%4:vgpr_32 = COPY %3, implicit $exec
127+
$vgpr0 = COPY %4
128+
SI_RETURN implicit $vgpr0
129+
130+
...
131+

0 commit comments

Comments
 (0)