Skip to content

Commit 76a6947

Browse files
committed
AMDGPU: Add agpr variants of multi-data DS instructions
The instruction definitions for loads and stores do not accurately model the operand constraints of loads and stores with AGPRs. They use AV register classes, plus a hack a hack in getRegClass/getOpRegClass to avoid using AGPRs or AV classes with the multiple operand cases, but it did not consider the 3 operand case. Model this correctly by using separate all-VGPR and all-AGPR variants for the cases with multiple data operands. This does regress the assembler errors on gfx908 for the multi-operand cases. It now reports a generic operand invalid error for GPU instead of the specific message that agpr loads and stores aren't supported. In the future AMDGPURewriteAGPRCopyMFMA should be taught to replace the VGPR forms with the AGPR ones. Most of the diff is fighting the DS pseudo structure. The mnemonic was being used as the key to SIMCInstr, which is a collision in the AGPR case. We also need to go out of our way to make sure we are using the gfx9+ variants of the pseudos without the m0 use. The DS multiclasses could use a lot of cleanup. Fixes #155777
1 parent 849b10b commit 76a6947

File tree

6 files changed

+697
-509
lines changed

6 files changed

+697
-509
lines changed

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 366 additions & 284 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2598,6 +2598,17 @@ class getLdStRegisterOperand<RegisterClass RC> {
25982598
!eq(RC.Size, 1024) : AVLdSt_1024);
25992599
}
26002600

2601+
class getEquivalentAGPRClass<RegisterClass RC> {
2602+
RegisterClass ret =
2603+
!cond(!eq(RC.Size, 32) : AGPR_32,
2604+
!eq(RC.Size, 64) : AReg_64,
2605+
!eq(RC.Size, 96) : AReg_96,
2606+
!eq(RC.Size, 128) : AReg_128,
2607+
!eq(RC.Size, 160) : AReg_160,
2608+
!eq(RC.Size, 1024) : AReg_1024);
2609+
}
2610+
2611+
26012612
class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
26022613
ValueType Src1VT = i32, ValueType Src2VT = i32> {
26032614
bit ret = !if(!eq(DstVT.Size, 64),

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,3 +1395,35 @@ def AISrc_512_f32 : SrcRegOrImmA9 <AReg_512, "OPERAND_REG_INLINE_AC_FP32">;
13951395
def AISrc_512_b32 : SrcRegOrImmA9 <AReg_512, "OPERAND_REG_INLINE_AC_INT32">;
13961396
def AISrc_1024_f32 : SrcRegOrImmA9 <AReg_1024, "OPERAND_REG_INLINE_AC_FP32">;
13971397
def AISrc_1024_b32 : SrcRegOrImmA9 <AReg_1024, "OPERAND_REG_INLINE_AC_INT32">;
1398+
1399+
//===----------------------------------------------------------------------===//
1400+
// Tablegen programming utilities
1401+
//===----------------------------------------------------------------------===//
1402+
1403+
/// Helper function to extract the register class from an
1404+
/// instruction's operand list, which may be a RegisterOperand or a
1405+
/// direct RegisterClass reference.
1406+
class getRegClassFromOp<DAGOperand Op> {
1407+
SIRegisterClass ret = !if(
1408+
!isa<RegisterOperand>(Op),
1409+
!cast<SIRegisterClass>(!cast<RegisterOperand>(Op).RegClass),
1410+
!cast<SIRegisterClass>(Op));
1411+
}
1412+
1413+
/// Check if the operand will use an AV_* class.
1414+
class OperandIsAV<DAGOperand Op> {
1415+
defvar reg_class = getRegClassFromOp<Op>.ret;
1416+
bit ret = !and(reg_class.HasAGPR, reg_class.HasVGPR);
1417+
}
1418+
1419+
/// Check if the operand will use an AGPR class.
1420+
class OperandIsAGPR<DAGOperand Op> {
1421+
defvar reg_class = getRegClassFromOp<Op>.ret;
1422+
bit ret = !and(reg_class.HasAGPR, !not(reg_class.HasVGPR));
1423+
}
1424+
1425+
/// Check if the operand will use a VGPR class.
1426+
class OperandIsVGPR<DAGOperand Op> {
1427+
defvar reg_class = getRegClassFromOp<Op>.ret;
1428+
bit ret = !and(reg_class.HasVGPR, !not(reg_class.HasAGPR));
1429+
}

llvm/test/CodeGen/AMDGPU/a-v-ds-atomic-cmpxchg.ll

Lines changed: 103 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -77,49 +77,112 @@ define void @ds_atomic_cmpxchg_i32_ret_av_av__a(ptr addrspace(3) %ptr) #0 {
7777
ret void
7878
}
7979

80-
; FIXME: Broken
81-
; define void @ds_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(3) %ptr) #0 {
82-
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
83-
; %data0 = call i32 asm "; def $0", "=a"()
84-
; %data1 = call i32 asm "; def $0", "=a"()
85-
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
86-
; %result = extractvalue { i32, i1 } %pair, 0
87-
; call void asm "; use $0", "a"(i32 %result)
88-
; ret void
89-
; }
80+
define void @ds_atomic_cmpxchg_i32_ret_a_a__a(ptr addrspace(3) %ptr) #0 {
81+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_a__a:
82+
; CHECK: ; %bb.0:
83+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84+
; CHECK-NEXT: ;;#ASMSTART
85+
; CHECK-NEXT: ; def a0
86+
; CHECK-NEXT: ;;#ASMEND
87+
; CHECK-NEXT: ;;#ASMSTART
88+
; CHECK-NEXT: ; def a1
89+
; CHECK-NEXT: ;;#ASMEND
90+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
91+
; CHECK-NEXT: v_accvgpr_read_b32 v2, a1
92+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
93+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
94+
; CHECK-NEXT: v_accvgpr_write_b32 a0, v0
95+
; CHECK-NEXT: ;;#ASMSTART
96+
; CHECK-NEXT: ; use a0
97+
; CHECK-NEXT: ;;#ASMEND
98+
; CHECK-NEXT: s_setpc_b64 s[30:31]
99+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
100+
%data0 = call i32 asm "; def $0", "=a"()
101+
%data1 = call i32 asm "; def $0", "=a"()
102+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
103+
%result = extractvalue { i32, i1 } %pair, 0
104+
call void asm "; use $0", "a"(i32 %result)
105+
ret void
106+
}
90107

91-
; FIXME: Broken
92-
; define void @ds_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(3) %ptr) #0 {
93-
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
94-
; %data0 = call i32 asm "; def $0", "=a"()
95-
; %data1 = call i32 asm "; def $0", "=a"()
96-
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
97-
; %result = extractvalue { i32, i1 } %pair, 0
98-
; call void asm "; use $0", "v"(i32 %result)
99-
; ret void
100-
; }
108+
define void @ds_atomic_cmpxchg_i32_ret_a_a__v(ptr addrspace(3) %ptr) #0 {
109+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_a__v:
110+
; CHECK: ; %bb.0:
111+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; CHECK-NEXT: ;;#ASMSTART
113+
; CHECK-NEXT: ; def a0
114+
; CHECK-NEXT: ;;#ASMEND
115+
; CHECK-NEXT: ;;#ASMSTART
116+
; CHECK-NEXT: ; def a1
117+
; CHECK-NEXT: ;;#ASMEND
118+
; CHECK-NEXT: v_accvgpr_read_b32 v1, a0
119+
; CHECK-NEXT: v_accvgpr_read_b32 v2, a1
120+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
121+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
122+
; CHECK-NEXT: ;;#ASMSTART
123+
; CHECK-NEXT: ; use v0
124+
; CHECK-NEXT: ;;#ASMEND
125+
; CHECK-NEXT: s_setpc_b64 s[30:31]
126+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
127+
%data0 = call i32 asm "; def $0", "=a"()
128+
%data1 = call i32 asm "; def $0", "=a"()
129+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
130+
%result = extractvalue { i32, i1 } %pair, 0
131+
call void asm "; use $0", "v"(i32 %result)
132+
ret void
133+
}
101134

102-
; FIXME: Broken
103-
; define void @ds_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(3) %ptr) #0 {
104-
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
105-
; %data0 = call i32 asm "; def $0", "=v"()
106-
; %data1 = call i32 asm "; def $0", "=a"()
107-
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
108-
; %result = extractvalue { i32, i1 } %pair, 0
109-
; call void asm "; use $0", "v"(i32 %result)
110-
; ret void
111-
; }
135+
define void @ds_atomic_cmpxchg_i32_ret_v_a__v(ptr addrspace(3) %ptr) #0 {
136+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_a__v:
137+
; CHECK: ; %bb.0:
138+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139+
; CHECK-NEXT: ;;#ASMSTART
140+
; CHECK-NEXT: ; def a0
141+
; CHECK-NEXT: ;;#ASMEND
142+
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
143+
; CHECK-NEXT: ;;#ASMSTART
144+
; CHECK-NEXT: ; def v1
145+
; CHECK-NEXT: ;;#ASMEND
146+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v1, v2 offset:40
147+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
148+
; CHECK-NEXT: ;;#ASMSTART
149+
; CHECK-NEXT: ; use v0
150+
; CHECK-NEXT: ;;#ASMEND
151+
; CHECK-NEXT: s_setpc_b64 s[30:31]
152+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
153+
%data0 = call i32 asm "; def $0", "=v"()
154+
%data1 = call i32 asm "; def $0", "=a"()
155+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
156+
%result = extractvalue { i32, i1 } %pair, 0
157+
call void asm "; use $0", "v"(i32 %result)
158+
ret void
159+
}
112160

113-
; FIXME: Broken
114-
; define void @ds_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(3) %ptr) #0 {
115-
; %gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
116-
; %data0 = call i32 asm "; def $0", "=a"()
117-
; %data1 = call i32 asm "; def $0", "=v"()
118-
; %pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
119-
; %result = extractvalue { i32, i1 } %pair, 0
120-
; call void asm "; use $0", "v"(i32 %result)
121-
; ret void
122-
; }
161+
define void @ds_atomic_cmpxchg_i32_ret_a_v__v(ptr addrspace(3) %ptr) #0 {
162+
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_a_v__v:
163+
; CHECK: ; %bb.0:
164+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165+
; CHECK-NEXT: ;;#ASMSTART
166+
; CHECK-NEXT: ; def a0
167+
; CHECK-NEXT: ;;#ASMEND
168+
; CHECK-NEXT: v_accvgpr_read_b32 v2, a0
169+
; CHECK-NEXT: ;;#ASMSTART
170+
; CHECK-NEXT: ; def v1
171+
; CHECK-NEXT: ;;#ASMEND
172+
; CHECK-NEXT: ds_cmpst_rtn_b32 v0, v0, v2, v1 offset:40
173+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
174+
; CHECK-NEXT: ;;#ASMSTART
175+
; CHECK-NEXT: ; use v0
176+
; CHECK-NEXT: ;;#ASMEND
177+
; CHECK-NEXT: s_setpc_b64 s[30:31]
178+
%gep.0 = getelementptr inbounds [512 x i32], ptr addrspace(3) %ptr, i32 0, i32 10
179+
%data0 = call i32 asm "; def $0", "=a"()
180+
%data1 = call i32 asm "; def $0", "=v"()
181+
%pair = cmpxchg ptr addrspace(3) %gep.0, i32 %data0, i32 %data1 seq_cst monotonic
182+
%result = extractvalue { i32, i1 } %pair, 0
183+
call void asm "; use $0", "v"(i32 %result)
184+
ret void
185+
}
123186

124187
define void @ds_atomic_cmpxchg_i32_ret_v_v__a(ptr addrspace(3) %ptr) #0 {
125188
; CHECK-LABEL: ds_atomic_cmpxchg_i32_ret_v_v__a:

llvm/test/MC/AMDGPU/gfx90a_err.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,13 +166,13 @@ buffer_store_dwordx4 v[0:3], off, s[12:15], s4 offset:4095 glc tfe
166166
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
167167

168168
ds_write2_b64 v1, a[4:5], v[2:3] offset1:255
169-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: data and dst should be all VGPR or AGPR
169+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
170170

171171
ds_write2_b64 v1, v[4:5], a[2:3] offset1:255
172-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: data and dst should be all VGPR or AGPR
172+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
173173

174174
ds_wrxchg2st64_rtn_b32 v[6:7], v1, a2, a3 offset0:127
175-
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: data and dst should be all VGPR or AGPR
175+
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
176176

177177
image_load v[0:4], v2, s[0:7] dmask:0xf unorm tfe
178178
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

0 commit comments

Comments
 (0)