Skip to content

Commit dbc8af1

Browse files
committed
AMDGPU: Figure out required AGPR count for inline asm
For now just try to compute the minimum number of AGPRs required to allocate the asm. Leave the attributor changes to turn this into an integer value for later.
1 parent 3f62407 commit dbc8af1

File tree

2 files changed

+251
-7
lines changed

2 files changed

+251
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,16 +1211,61 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
12111211
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
12121212
}
12131213

1214-
static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
1215-
for (const auto &CI : IA->ParseConstraints()) {
1214+
/// Compute the minimum number of AGPRs required to allocate the inline asm.
1215+
static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1216+
const CallBase &Call) {
1217+
unsigned ArgNo = 0;
1218+
unsigned ResNo = 0;
1219+
unsigned AGPRDefCount = 0;
1220+
unsigned AGPRUseCount = 0;
1221+
unsigned MaxPhysReg = 0;
1222+
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1223+
1224+
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1225+
Type *Ty = nullptr;
1226+
switch (CI.Type) {
1227+
case InlineAsm::isOutput: {
1228+
Ty = Call.getType();
1229+
if (auto *STy = dyn_cast<StructType>(Ty))
1230+
Ty = STy->getElementType(ResNo);
1231+
++ResNo;
1232+
break;
1233+
}
1234+
case InlineAsm::isInput: {
1235+
Ty = Call.getArgOperand(ArgNo++)->getType();
1236+
break;
1237+
}
1238+
case InlineAsm::isLabel:
1239+
continue;
1240+
case InlineAsm::isClobber:
1241+
// Parse the physical register reference.
1242+
break;
1243+
}
1244+
12161245
for (StringRef Code : CI.Codes) {
1217-
Code.consume_front("{");
1218-
if (Code.starts_with("a"))
1219-
return true;
1246+
if (Code.starts_with("a")) {
1247+
// Virtual register, compute number of registers based on the type.
1248+
//
1249+
// We ought to be going through TargetLowering to get the number of
1250+
// registers, but we should avoid the dependence on CodeGen here.
1251+
unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1252+
if (CI.Type == InlineAsm::isOutput) {
1253+
AGPRDefCount += RegCount;
1254+
if (CI.isEarlyClobber)
1255+
AGPRUseCount += RegCount;
1256+
} else
1257+
AGPRUseCount += RegCount;
1258+
} else {
1259+
// Physical register reference
1260+
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1261+
if (Kind == 'a')
1262+
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1263+
}
12201264
}
12211265
}
12221266

1223-
return false;
1267+
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1268+
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12241269
}
12251270

12261271
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1259,7 +1304,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
12591304
const Function *Callee = dyn_cast<Function>(CalleeOp);
12601305
if (!Callee) {
12611306
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1262-
return !inlineAsmUsesAGPRs(IA);
1307+
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
12631308
return false;
12641309
}
12651310

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,205 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
251251
ret void
252252
}
253253

254+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
255+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
256+
; CHECK-SAME: ) #[[ATTR0]] {
257+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
258+
; CHECK-NEXT: ret void
259+
;
260+
%def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
261+
ret void
262+
}
263+
264+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
265+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
266+
; CHECK-SAME: ) #[[ATTR0]] {
267+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
268+
; CHECK-NEXT: ret void
269+
;
270+
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
271+
ret void
272+
}
273+
274+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
275+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
276+
; CHECK-SAME: ) #[[ATTR0]] {
277+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
278+
; CHECK-NEXT: ret void
279+
;
280+
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
281+
ret void
282+
}
283+
284+
define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
285+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
286+
; CHECK-SAME: ) #[[ATTR0]] {
287+
; CHECK-NEXT: call void asm sideeffect "
288+
; CHECK-NEXT: ret void
289+
;
290+
call void asm sideeffect "; use $0", "a"(ptr poison)
291+
ret void
292+
}
293+
294+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
295+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
296+
; CHECK-SAME: ) #[[ATTR0]] {
297+
; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
298+
; CHECK-NEXT: ret void
299+
;
300+
%def = call ptr asm sideeffect "; def $0", "=a"()
301+
ret void
302+
}
303+
304+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
305+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
306+
; CHECK-SAME: ) #[[ATTR0]] {
307+
; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
308+
; CHECK-NEXT: ret void
309+
;
310+
%def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
311+
ret void
312+
}
313+
314+
define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
315+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
316+
; CHECK-SAME: ) #[[ATTR0]] {
317+
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
318+
; CHECK-NEXT: ret void
319+
;
320+
%def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
321+
ret void
322+
}
323+
324+
define amdgpu_kernel void @kernel_uses_asm_clobber() {
325+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
326+
; CHECK-SAME: ) #[[ATTR0]] {
327+
; CHECK-NEXT: call void asm sideeffect "
328+
; CHECK-NEXT: ret void
329+
;
330+
call void asm sideeffect "; clobber $0", "~{a4}"()
331+
ret void
332+
}
333+
334+
define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
335+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
336+
; CHECK-SAME: ) #[[ATTR0]] {
337+
; CHECK-NEXT: call void asm sideeffect "
338+
; CHECK-NEXT: ret void
339+
;
340+
call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
341+
ret void
342+
}
343+
344+
define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
345+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
346+
; CHECK-SAME: ) #[[ATTR0]] {
347+
; CHECK-NEXT: call void asm sideeffect "
348+
; CHECK-NEXT: ret void
349+
;
350+
call void asm sideeffect "; clobber $0", "~{a256}"()
351+
ret void
352+
}
353+
354+
define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
355+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
356+
; CHECK-SAME: ) #[[ATTR0]] {
357+
; CHECK-NEXT: call void asm sideeffect "
358+
; CHECK-NEXT: ret void
359+
;
360+
call void asm sideeffect "; clobber $0", "~{a255}"()
361+
ret void
362+
}
363+
364+
define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
365+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
366+
; CHECK-SAME: ) #[[ATTR0]] {
367+
; CHECK-NEXT: call void asm sideeffect "
368+
; CHECK-NEXT: ret void
369+
;
370+
call void asm sideeffect "; use $0", "{a256}"(i32 poison)
371+
ret void
372+
}
373+
374+
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
375+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
376+
; CHECK-SAME: ) #[[ATTR0]] {
377+
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
378+
; CHECK-NEXT: ret void
379+
;
380+
%def = call <32 x i32> asm sideeffect "; def $0", "=a"()
381+
ret void
382+
}
383+
384+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
385+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
386+
; CHECK-SAME: ) #[[ATTR0]] {
387+
; CHECK-NEXT: call void asm sideeffect "
388+
; CHECK-NEXT: ret void
389+
;
390+
call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
391+
ret void
392+
}
393+
394+
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
395+
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
396+
; CHECK-SAME: ) #[[ATTR0]] {
397+
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
398+
; CHECK-NEXT: ret void
399+
;
400+
%def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
401+
ret void
402+
}
403+
404+
define amdgpu_kernel void @vreg_use_exceeds_register_file() {
405+
; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
406+
; CHECK-SAME: ) #[[ATTR0]] {
407+
; CHECK-NEXT: call void asm sideeffect "
408+
; CHECK-NEXT: ret void
409+
;
410+
call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
411+
ret void
412+
}
413+
414+
define amdgpu_kernel void @vreg_def_exceeds_register_file() {
415+
; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
416+
; CHECK-SAME: ) #[[ATTR0]] {
417+
; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
418+
; CHECK-NEXT: ret void
419+
;
420+
%def = call <257 x i32> asm sideeffect "; def $0", "=a"()
421+
ret void
422+
}
423+
424+
define amdgpu_kernel void @multiple() {
425+
; CHECK-LABEL: define amdgpu_kernel void @multiple(
426+
; CHECK-SAME: ) #[[ATTR0]] {
427+
; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
428+
; CHECK-NEXT: ret void
429+
;
430+
%def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
431+
ret void
432+
}
433+
434+
define amdgpu_kernel void @earlyclobber_0() {
435+
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
436+
; CHECK-SAME: ) #[[ATTR0]] {
437+
; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
438+
; CHECK-NEXT: ret void
439+
;
440+
%def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
441+
ret void
442+
}
443+
444+
define amdgpu_kernel void @earlyclobber_1() {
445+
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
446+
; CHECK-SAME: ) #[[ATTR0]] {
447+
; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
448+
; CHECK-NEXT: ret void
449+
;
450+
%def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
451+
ret void
452+
}
254453

255454
attributes #0 = { "amdgpu-agpr-alloc"="0" }
256455
;.

0 commit comments

Comments
 (0)