- 
                Notifications
    You must be signed in to change notification settings 
- Fork 14.9k
AMDGPU: Remove MIMG special case in adjustAllocatableRegClass #158184
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Remove MIMG special case in adjustAllocatableRegClass #158184
Conversation
| 
 This stack of pull requests is managed by Graphite. Learn more about stacking. | 
| @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesI have no idea why this was here. MIMG atomics use tied operands Full diff: https://github.com/llvm/llvm-project/pull/158184.diff 3 Files Affected: 
 diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 23a124fecddad..7b7d8c0dfbe16 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5975,10 +5975,10 @@ SIInstrInfo::getWholeWaveFunctionSetup(MachineFunction &MF) const {
 
 static const TargetRegisterClass *
 adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
-                          const MCInstrDesc &TID, unsigned RCID) {
+                          const MCInstrDesc &TID, unsigned RCID,
+                          bool IsAllocatable) {
   if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) &&
-                                !(TID.TSFlags & SIInstrFlags::Spill)) ||
-                               (TID.TSFlags & SIInstrFlags::MIMG))) {
+                                !(TID.TSFlags & SIInstrFlags::Spill)))) {
     switch (RCID) {
     case AMDGPU::AV_32RegClassID:
       RCID = AMDGPU::VGPR_32RegClassID;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
new file mode 100644
index 0000000000000..49607e320bd0a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
+
+define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+; FIXME: This should directly use the AGPRs
+define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v1, a0
+; GFX90A-NEXT:    image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index dcac419f8591d..bb4a607fc62d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -418,6 +418,114 @@ main_body:
   ret <4 x float> %v
 }
 
+define amdgpu_ps void @load_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: load_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: load_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: load_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: load_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @store_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: store_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: store_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: store_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: store_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 | 
I have no idea why this was here. MIMG atomics use tied operands for the input and output, so AV classes should have always worked. We have poor test coverage for AGPRs with atomics, so add a partial set. Everything seems to work OK, although it seems image cmpswap always uses VGPRs unnecessarily.
b2feae8    to
    94b665d      
    Compare
  
    | @arsenm | 
| 
 Fix: #158391 | 

I have no idea why this was here. MIMG atomics use tied operands
for the input and output, so AV classes should have always worked.
We have poor test coverage for AGPRs with atomics, so add a partial
set. Everything seems to work OK, although it seems image cmpswap
always uses VGPRs unnecessarily.