Skip to content

Commit ec903cc

Browse files
committed
AMDGPU: Stop handling AGPR case in getCrossCopyRegClass
This isn't what this is for. In the sense this hook is concerned with, you can copy between AGPRs. This only changes some DAG scheduling decisions; later passes are responsible for dealing with the bad agpr-agpr handling.
1 parent a7016c4 commit ec903cc

File tree

5 files changed

+66
-68
lines changed

5 files changed

+66
-68
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,11 +1118,8 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
11181118

11191119
const TargetRegisterClass *
11201120
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
1121-
if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
1122-
return getEquivalentVGPRClass(RC);
11231121
if (RC == &AMDGPU::SCC_CLASSRegClass)
11241122
return getWaveMaskRegClass();
1125-
11261123
return RC;
11271124
}
11281125

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
146146
; GFX908-NEXT: ;;#ASMSTART
147147
; GFX908-NEXT: ; copy
148148
; GFX908-NEXT: ;;#ASMEND
149-
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
149+
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
150150
; GFX908-NEXT: s_nop 1
151-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
151+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
152152
; GFX908-NEXT: ;;#ASMSTART
153153
; GFX908-NEXT: ; use a3 v[0:31]
154154
; GFX908-NEXT: ;;#ASMEND
@@ -437,9 +437,9 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
437437
; GFX908-NEXT: ; copy
438438
; GFX908-NEXT: ;;#ASMEND
439439
; GFX908-NEXT: s_nop 7
440-
; GFX908-NEXT: v_accvgpr_read_b32 v33, a2
440+
; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
441441
; GFX908-NEXT: s_nop 1
442-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v33
442+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v35
443443
; GFX908-NEXT: ;;#ASMSTART
444444
; GFX908-NEXT: ; use a3 v[0:31]
445445
; GFX908-NEXT: ;;#ASMEND
@@ -1045,9 +1045,9 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
10451045
; GFX908-NEXT: ;;#ASMSTART
10461046
; GFX908-NEXT: ; copy
10471047
; GFX908-NEXT: ;;#ASMEND
1048-
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2
1048+
; GFX908-NEXT: v_accvgpr_read_b32 v39, a2
10491049
; GFX908-NEXT: s_nop 1
1050-
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32
1050+
; GFX908-NEXT: v_accvgpr_write_b32 a3, v39
10511051
; GFX908-NEXT: ;;#ASMSTART
10521052
; GFX908-NEXT: ; use a3 v[0:31]
10531053
; GFX908-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/agpr-copy-propagation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ body: |
4040
; GFX908: liveins: $agpr0
4141
; GFX908-NEXT: {{ $}}
4242
; GFX908-NEXT: renamable $vgpr0 = COPY renamable $agpr0, implicit $exec
43-
; GFX908-NEXT: renamable $agpr1 = COPY renamable $vgpr0, implicit $exec
44-
; GFX908-NEXT: renamable $agpr2 = COPY renamable $vgpr0, implicit $exec
43+
; GFX908-NEXT: renamable $agpr1 = COPY $agpr0, implicit $exec
44+
; GFX908-NEXT: renamable $agpr2 = COPY $agpr0, implicit $exec
4545
; GFX908-NEXT: S_ENDPGM 0, implicit $vgpr0, implicit $agpr1, implicit $agpr2
4646
;
4747
; GFX90A-LABEL: name: do_not_propagate_agpr_to_agpr

llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
9595
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31]
9696
; GREEDY908-NEXT: s_nop 15
9797
; GREEDY908-NEXT: s_nop 1
98-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32
99-
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61
100-
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
101-
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1
102-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33
103-
; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59
104-
; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58
105-
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1
98+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32
99+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33
106100
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34
107-
; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57
108-
; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56
101+
; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2
102+
; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6
109103
; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1
110-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35
111-
; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55
112-
; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54
113-
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1
114-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36
115-
; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53
116-
; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52
117-
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1
104+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35
105+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36
118106
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37
119-
; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51
120-
; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50
107+
; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2
108+
; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6
121109
; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1
122-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38
123-
; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49
124-
; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48
125-
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1
126-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39
127-
; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47
128-
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46
129-
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1
110+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38
111+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39
130112
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40
131-
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v2
132-
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19
113+
; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v2
114+
; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6
133115
; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1
134-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41
135-
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18
136-
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17
137-
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1
138-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42
139-
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16
140-
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15
141-
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1
116+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41
117+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42
142118
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43
143-
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14
144-
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13
119+
; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2
120+
; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6
145121
; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1
146-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44
147-
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12
148-
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11
149-
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1
150-
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45
151-
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10
152-
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9
153-
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1
154-
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8
155-
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7
122+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44
123+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45
124+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46
125+
; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2
126+
; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6
127+
; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1
128+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47
129+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48
130+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49
131+
; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2
132+
; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6
133+
; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1
134+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50
135+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51
136+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52
137+
; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2
138+
; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6
139+
; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1
140+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53
141+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54
142+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55
143+
; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2
144+
; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6
145+
; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1
146+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56
147+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57
148+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58
149+
; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2
150+
; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6
151+
; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1
152+
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59
153+
; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
154+
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61
155+
; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2
156156
; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6
157-
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5
157+
; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1
158158
; GREEDY908-NEXT: s_nop 0
159159
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31]
160160
; GREEDY908-NEXT: s_nop 15
@@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
667667
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33]
668668
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33]
669669
; GREEDY908-NEXT: s_nop 8
670+
; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18
670671
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19
671-
; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18
672672
; GREEDY908-NEXT: s_nop 0
673+
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5
673674
; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2
674-
; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3
675675
; GREEDY908-NEXT: s_nop 0
676676
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15]
677677
; GREEDY908-NEXT: s_nop 9

llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,20 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) {
5454
; GFX908-NEXT: s_branch .LBB0_2
5555
; GFX908-NEXT: .LBB0_1: ; %bb2
5656
; GFX908-NEXT: ; in Loop: Header=BB0_2 Depth=1
57+
; GFX908-NEXT: s_nop 6
58+
; GFX908-NEXT: v_accvgpr_read_b32 v3, a2
5759
; GFX908-NEXT: s_or_b32 s4, s3, 1
5860
; GFX908-NEXT: s_ashr_i32 s5, s3, 31
5961
; GFX908-NEXT: s_mov_b32 s3, s2
6062
; GFX908-NEXT: v_mov_b32_e32 v1, s2
61-
; GFX908-NEXT: s_nop 2
62-
; GFX908-NEXT: v_accvgpr_read_b32 v0, a2
6363
; GFX908-NEXT: v_mov_b32_e32 v2, s3
64+
; GFX908-NEXT: v_accvgpr_write_b32 a0, v3
6465
; GFX908-NEXT: v_accvgpr_read_b32 v4, a1
6566
; GFX908-NEXT: v_accvgpr_read_b32 v3, a1
66-
; GFX908-NEXT: v_accvgpr_write_b32 a0, v0
67+
; GFX908-NEXT: s_and_b32 s3, s5, s4
6768
; GFX908-NEXT: v_accvgpr_write_b32 a2, v4
6869
; GFX908-NEXT: v_accvgpr_write_b32 a3, v3
69-
; GFX908-NEXT: s_and_b32 s3, s5, s4
70+
; GFX908-NEXT: s_nop 0
7071
; GFX908-NEXT: v_mfma_f32_16x16x16f16 a[2:5], v[1:2], v[1:2], a[0:3]
7172
; GFX908-NEXT: s_cbranch_execz .LBB0_4
7273
; GFX908-NEXT: .LBB0_2: ; %bb

0 commit comments

Comments
 (0)