Skip to content

Commit 48db3fd

Browse files
authored
AMDGPU: Stop handling AGPR case in getCrossCopyRegClass (llvm#161800)
This isn't what this is for. In the sense this hook is concerned with, you can copy between AGPRs. This only changes some DAG scheduling decisions; later passes are responsible for dealing with the bad agpr-agpr handling.
1 parent f3a9523 commit 48db3fd

File tree

5 files changed

+66
-68
lines changed

5 files changed

+66
-68
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,11 +1118,8 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
11181118

11191119
const TargetRegisterClass *
11201120
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
1121-
if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
1122-
return getEquivalentVGPRClass(RC);
11231121
if (RC == &AMDGPU::SCC_CLASSRegClass)
11241122
return getWaveMaskRegClass();
1125-
11261123
return RC;
11271124
}
11281125

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
146146
; GFX908-NEXT: ;;#ASMSTART
147147
; GFX908-NEXT: ; copy
148148
; GFX908-NEXT: ;;#ASMEND
149-
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
149+
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
150150
; GFX908-NEXT: s_nop 1
151-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
151+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
152152
; GFX908-NEXT: ;;#ASMSTART
153153
; GFX908-NEXT: ; use a3 v[0:31]
154154
; GFX908-NEXT: ;;#ASMEND
@@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
437437
; GFX908-NEXT: ; copy
438438
; GFX908-NEXT: ;;#ASMEND
439439
; GFX908-NEXT: s_nop 7
440-
; GFX908-NEXT: v_accvgpr_read_b32 v33, a2
440+
; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
441441
; GFX908-NEXT: s_nop 1
442-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v33
442+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v35
443443
; GFX908-NEXT: ;;#ASMSTART
444444
; GFX908-NEXT: ; use a3 v[0:31]
445445
; GFX908-NEXT: ;;#ASMEND
@@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
10451045
; GFX908-NEXT: ;;#ASMSTART
10461046
; GFX908-NEXT: ; copy
10471047
; GFX908-NEXT: ;;#ASMEND
1048-
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
1048+
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
10491049
; GFX908-NEXT: s_nop 1
1050-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
1050+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
10511051
; GFX908-NEXT: ;;#ASMSTART
10521052
; GFX908-NEXT: ; use a3 v[0:31]
10531053
; GFX908-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ body: |
4040
; GFX908: liveins: $agpr0
4141
; GFX908-NEXT: {{ $}}
4242
; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
43-
; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
44-
; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
43+
; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec
44+
; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec
4545
; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
4646
;
4747
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr

llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
9595
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31]
9696
; GREEDY908-NEXT: s_nop 15
9797
; GREEDY908-NEXT: s_nop 1
98-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32
99-
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61
100-
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
101-
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1
102-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33
103-
; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59
104-
; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58
105-
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1
98+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32
99+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33
106100
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34
107-
; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57
108-
; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56
101+
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2
102+
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6
109103
; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1
110-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35
111-
; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55
112-
; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54
113-
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1
114-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36
115-
; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53
116-
; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52
117-
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1
104+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35
105+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36
118106
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37
119-
; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51
120-
; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50
107+
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2
108+
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6
121109
; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1
122-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38
123-
; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49
124-
; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48
125-
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1
126-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39
127-
; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47
128-
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46
129-
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1
110+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38
111+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39
130112
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40
131-
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2
132-
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19
113+
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2
114+
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6
133115
; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1
134-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41
135-
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18
136-
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17
137-
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1
138-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42
139-
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16
140-
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15
141-
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1
116+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41
117+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42
142118
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43
143-
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14
144-
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13
119+
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2
120+
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6
145121
; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1
146-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44
147-
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12
148-
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11
149-
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1
150-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45
151-
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10
152-
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9
153-
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1
154-
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8
155-
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7
122+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44
123+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45
124+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46
125+
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2
126+
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6
127+
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1
128+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47
129+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48
130+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49
131+
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2
132+
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6
133+
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1
134+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50
135+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51
136+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52
137+
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2
138+
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6
139+
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1
140+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53
141+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54
142+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55
143+
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2
144+
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6
145+
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1
146+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56
147+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57
148+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58
149+
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2
150+
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6
151+
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1
152+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59
153+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
154+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61
155+
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2
156156
; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6
157-
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5
157+
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1
158158
; GREEDY908-NEXT: s_nop 0
159159
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31]
160160
; GREEDY908-NEXT: s_nop 15
@@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
667667
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33]
668668
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33]
669669
; GREEDY908-NEXT: s_nop 8
670+
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18
670671
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19
671-
; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18
672672
; GREEDY908-NEXT: s_nop 0
673+
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5
673674
; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2
674-
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3
675675
; GREEDY908-NEXT: s_nop 0
676676
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15]
677677
; GREEDY908-NEXT: s_nop 9

llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
5454
; GFX908-NEXT: s_branch .LBB0_2
5555
; GFX908-NEXT: .LBB0_1: ; %bb2
5656
; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1
57+
; GFX908-NEXT: s_nop 6
58+
; GFX908-NEXT: v_accvgpr_read_b32 v3, a2
5759
; GFX908-NEXT: s_or_b32 s4, s3, 1
5860
; GFX908-NEXT: s_ashr_i32 s5, s3, 31
5961
; GFX908-NEXT: s_mov_b32 s3, s2
6062
; GFX908-NEXT: v_mov_b32_e32 v1, s2
61-
; GFX908-NEXT: s_nop 2
62-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a2
6363
; GFX908-NEXT: v_mov_b32_e32 v2, s3
64+
; GFX908-NEXT: v_accvgpr_write_b32 a0, v3
6465
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
6566
; GFX908-NEXT: v_accvgpr_read_b32 v3, a1
66-
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
67+
; GFX908-NEXT: s_and_b32 s3, s5, s4
6768
; GFX908-NEXT: v_accvgpr_write_b32 a2, v4
6869
; GFX908-NEXT: v_accvgpr_write_b32 a3, v3
69-
; GFX908-NEXT: s_and_b32 s3, s5, s4
70+
; GFX908-NEXT: s_nop 0
7071
; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3]
7172
; GFX908-NEXT: s_cbranch_execz .LBB0_4
7273
; GFX908-NEXT: .LBB0_2: ; %bb

0 commit comments

Comments
 (0)