Skip to content

Commit 9e1d656

Browse files
authored
AMDGPU: Remove MIMG special case in adjustAllocatableRegClass (#158184)
I have no idea why this was here. MIMG atomics use tied operands for the input and output, so AV classes should have always worked. We have poor test coverage for AGPRs with atomics, so add a partial set. Everything seems to work OK, although it seems image cmpswap always uses VGPRs unnecessarily.
1 parent baec6c5 commit 9e1d656

File tree

3 files changed

+279
-2
lines changed

3 files changed

+279
-2
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5977,8 +5977,7 @@ static const TargetRegisterClass *
59775977
adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
59785978
const MCInstrDesc &TID, unsigned RCID) {
59795979
if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) &&
5980-
!(TID.TSFlags & SIInstrFlags::Spill)) ||
5981-
(TID.TSFlags & SIInstrFlags::MIMG))) {
5980+
!(TID.TSFlags & SIInstrFlags::Spill)))) {
59825981
switch (RCID) {
59835982
case AMDGPU::AV_32RegClassID:
59845983
RCID = AMDGPU::VGPR_32RegClassID;
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
3+
4+
define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
5+
; GFX90A-LABEL: atomic_swap_1d_agpr:
6+
; GFX90A: ; %bb.0:
7+
; GFX90A-NEXT: ;;#ASMSTART
8+
; GFX90A-NEXT: ; def a0
9+
; GFX90A-NEXT: ;;#ASMEND
10+
; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc
11+
; GFX90A-NEXT: s_endpgm
12+
%data = call i32 asm "; def $0", "=a"()
13+
%v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
14+
call void asm "; use $0", "a"(i32 %v)
15+
ret void
16+
}
17+
18+
define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
19+
; GFX90A-LABEL: atomic_add_2d_agpr:
20+
; GFX90A: ; %bb.0:
21+
; GFX90A-NEXT: ;;#ASMSTART
22+
; GFX90A-NEXT: ; def a0
23+
; GFX90A-NEXT: ;;#ASMEND
24+
; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc
25+
; GFX90A-NEXT: s_endpgm
26+
%data = call i32 asm "; def $0", "=a"()
27+
%v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
28+
call void asm "; use $0", "a"(i32 %v)
29+
ret void
30+
}
31+
32+
; FIXME: This should directly use the AGPRs
33+
define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
34+
; GFX90A-LABEL: atomic_cmpswap_1d_agpr:
35+
; GFX90A: ; %bb.0:
36+
; GFX90A-NEXT: ;;#ASMSTART
37+
; GFX90A-NEXT: ; def a0
38+
; GFX90A-NEXT: ;;#ASMEND
39+
; GFX90A-NEXT: ;;#ASMSTART
40+
; GFX90A-NEXT: ; def a1
41+
; GFX90A-NEXT: ;;#ASMEND
42+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
43+
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
44+
; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
45+
; GFX90A-NEXT: s_endpgm
46+
%cmp = call i32 asm "; def $0", "=a"()
47+
%swap = call i32 asm "; def $0", "=a"()
48+
%v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
49+
call void asm "; use $0", "a"(i32 %v)
50+
ret void
51+
}
52+
53+
define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
54+
; GFX90A-LABEL: atomic_swap_1d_i64_agpr:
55+
; GFX90A: ; %bb.0:
56+
; GFX90A-NEXT: ;;#ASMSTART
57+
; GFX90A-NEXT: ; def a[0:1]
58+
; GFX90A-NEXT: ;;#ASMEND
59+
; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
60+
; GFX90A-NEXT: s_endpgm
61+
%data = call i64 asm "; def $0", "=a"()
62+
%v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
63+
call void asm "; use $0", "a"(i64 %v)
64+
ret void
65+
}
66+
67+
define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
68+
; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr:
69+
; GFX90A: ; %bb.0:
70+
; GFX90A-NEXT: ;;#ASMSTART
71+
; GFX90A-NEXT: ; def a[0:1]
72+
; GFX90A-NEXT: ;;#ASMEND
73+
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
74+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
75+
; GFX90A-NEXT: ;;#ASMSTART
76+
; GFX90A-NEXT: ; def a[0:1]
77+
; GFX90A-NEXT: ;;#ASMEND
78+
; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1
79+
; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0
80+
; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
81+
; GFX90A-NEXT: s_endpgm
82+
%cmp = call i64 asm "; def $0", "=a"()
83+
%swap = call i64 asm "; def $0", "=a"()
84+
%v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
85+
call void asm "; use $0", "a"(i64 %v)
86+
ret void
87+
}
88+
89+
define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
90+
; GFX90A-LABEL: atomic_swap_1d_agpr_noret:
91+
; GFX90A: ; %bb.0:
92+
; GFX90A-NEXT: ;;#ASMSTART
93+
; GFX90A-NEXT: ; def a0
94+
; GFX90A-NEXT: ;;#ASMEND
95+
; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0
96+
; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
97+
; GFX90A-NEXT: s_endpgm
98+
%data = call i32 asm "; def $0", "=a"()
99+
%unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
100+
ret void
101+
}
102+
103+
define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
104+
; GFX90A-LABEL: atomic_add_2d_agpr_noret:
105+
; GFX90A: ; %bb.0:
106+
; GFX90A-NEXT: ;;#ASMSTART
107+
; GFX90A-NEXT: ; def a0
108+
; GFX90A-NEXT: ;;#ASMEND
109+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
110+
; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
111+
; GFX90A-NEXT: s_endpgm
112+
%data = call i32 asm "; def $0", "=a"()
113+
%unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
114+
ret void
115+
}
116+
117+
define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
118+
; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret:
119+
; GFX90A: ; %bb.0:
120+
; GFX90A-NEXT: ;;#ASMSTART
121+
; GFX90A-NEXT: ; def a0
122+
; GFX90A-NEXT: ;;#ASMEND
123+
; GFX90A-NEXT: ;;#ASMSTART
124+
; GFX90A-NEXT: ; def a1
125+
; GFX90A-NEXT: ;;#ASMEND
126+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
127+
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
128+
; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
129+
; GFX90A-NEXT: s_endpgm
130+
%cmp = call i32 asm "; def $0", "=a"()
131+
%swap = call i32 asm "; def $0", "=a"()
132+
%unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
133+
ret void
134+
}
135+
136+
define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
137+
; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret:
138+
; GFX90A: ; %bb.0:
139+
; GFX90A-NEXT: ;;#ASMSTART
140+
; GFX90A-NEXT: ; def a[0:1]
141+
; GFX90A-NEXT: ;;#ASMEND
142+
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
143+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
144+
; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
145+
; GFX90A-NEXT: s_endpgm
146+
%data = call i64 asm "; def $0", "=a"()
147+
%unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
148+
ret void
149+
}
150+
151+
define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
152+
; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret:
153+
; GFX90A: ; %bb.0:
154+
; GFX90A-NEXT: ;;#ASMSTART
155+
; GFX90A-NEXT: ; def a[0:1]
156+
; GFX90A-NEXT: ;;#ASMEND
157+
; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
158+
; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
159+
; GFX90A-NEXT: ;;#ASMSTART
160+
; GFX90A-NEXT: ; def a[0:1]
161+
; GFX90A-NEXT: ;;#ASMEND
162+
; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1
163+
; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0
164+
; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
165+
; GFX90A-NEXT: s_endpgm
166+
%cmp = call i64 asm "; def $0", "=a"()
167+
%swap = call i64 asm "; def $0", "=a"()
168+
%unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
169+
ret void
170+
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,114 @@ main_body:
418418
ret <4 x float> %v
419419
}
420420

421+
define amdgpu_ps void @load_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
422+
; GCN-LABEL: load_1d_agpr:
423+
; GCN: ; %bb.0:
424+
; GCN-NEXT: image_load a[0:3], v0, s[0:7] dmask:0xf unorm
425+
; GCN-NEXT: s_waitcnt vmcnt(0)
426+
; GCN-NEXT: ;;#ASMSTART
427+
; GCN-NEXT: ; use a[0:3]
428+
; GCN-NEXT: ;;#ASMEND
429+
; GCN-NEXT: s_endpgm
430+
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
431+
call void asm sideeffect "; use $0", "a"(<4 x float> %v)
432+
ret void
433+
}
434+
435+
define amdgpu_ps void @load_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
436+
; GCN-LABEL: load_2d_agpr:
437+
; GCN: ; %bb.0:
438+
; GCN-NEXT: image_load a[0:3], v[0:1], s[0:7] dmask:0xf unorm
439+
; GCN-NEXT: s_waitcnt vmcnt(0)
440+
; GCN-NEXT: ;;#ASMSTART
441+
; GCN-NEXT: ; use a[0:3]
442+
; GCN-NEXT: ;;#ASMEND
443+
; GCN-NEXT: s_endpgm
444+
%v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
445+
call void asm sideeffect "; use $0", "a"(<4 x float> %v)
446+
ret void
447+
}
448+
449+
define amdgpu_ps void @load_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
450+
; GCN-LABEL: load_3d_agpr:
451+
; GCN: ; %bb.0:
452+
; GCN-NEXT: image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm
453+
; GCN-NEXT: s_waitcnt vmcnt(0)
454+
; GCN-NEXT: ;;#ASMSTART
455+
; GCN-NEXT: ; use a[0:3]
456+
; GCN-NEXT: ;;#ASMEND
457+
; GCN-NEXT: s_endpgm
458+
%v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
459+
call void asm sideeffect "; use $0", "a"(<4 x float> %v)
460+
ret void
461+
}
462+
463+
define amdgpu_ps void @load_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
464+
; GCN-LABEL: load_cube_agpr:
465+
; GCN: ; %bb.0:
466+
; GCN-NEXT: image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
467+
; GCN-NEXT: s_waitcnt vmcnt(0)
468+
; GCN-NEXT: ;;#ASMSTART
469+
; GCN-NEXT: ; use a[0:3]
470+
; GCN-NEXT: ;;#ASMEND
471+
; GCN-NEXT: s_endpgm
472+
%v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
473+
call void asm sideeffect "; use $0", "a"(<4 x float> %v)
474+
ret void
475+
}
476+
477+
define amdgpu_ps void @store_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
478+
; GCN-LABEL: store_1d_agpr:
479+
; GCN: ; %bb.0:
480+
; GCN-NEXT: ;;#ASMSTART
481+
; GCN-NEXT: ; def a[0:3]
482+
; GCN-NEXT: ;;#ASMEND
483+
; GCN-NEXT: image_store a[0:3], v0, s[0:7] dmask:0xf unorm
484+
; GCN-NEXT: s_endpgm
485+
%vdata = call <4 x float> asm "; def $0", "=a"()
486+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
487+
ret void
488+
}
489+
490+
define amdgpu_ps void @store_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
491+
; GCN-LABEL: store_2d_agpr:
492+
; GCN: ; %bb.0:
493+
; GCN-NEXT: ;;#ASMSTART
494+
; GCN-NEXT: ; def a[0:3]
495+
; GCN-NEXT: ;;#ASMEND
496+
; GCN-NEXT: image_store a[0:3], v[0:1], s[0:7] dmask:0xf unorm
497+
; GCN-NEXT: s_endpgm
498+
%vdata = call <4 x float> asm "; def $0", "=a"()
499+
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
500+
ret void
501+
}
502+
503+
define amdgpu_ps void @store_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
504+
; GCN-LABEL: store_3d_agpr:
505+
; GCN: ; %bb.0:
506+
; GCN-NEXT: ;;#ASMSTART
507+
; GCN-NEXT: ; def a[0:3]
508+
; GCN-NEXT: ;;#ASMEND
509+
; GCN-NEXT: image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm
510+
; GCN-NEXT: s_endpgm
511+
%vdata = call <4 x float> asm "; def $0", "=a"()
512+
call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
513+
ret void
514+
}
515+
516+
define amdgpu_ps void @store_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
517+
; GCN-LABEL: store_cube_agpr:
518+
; GCN: ; %bb.0:
519+
; GCN-NEXT: ;;#ASMSTART
520+
; GCN-NEXT: ; def a[0:3]
521+
; GCN-NEXT: ;;#ASMEND
522+
; GCN-NEXT: image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
523+
; GCN-NEXT: s_endpgm
524+
%vdata = call <4 x float> asm "; def $0", "=a"()
525+
call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
526+
ret void
527+
}
528+
421529
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
422530
declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
423531
declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1

0 commit comments

Comments
 (0)