Skip to content

Commit 702d479

Browse files
arsenmgithub-actions[bot]
authored andcommitted
Automerge: AMDGPU: Figure out required AGPR count for inline asm (#150910)
For now just try to compute the minimum number of AGPRs required to allocate the asm. Leave the attributor changes to turn this into an integer value for later.
2 parents e88516c + 3af95f0 commit 702d479

File tree

2 files changed

+552
-39
lines changed

2 files changed

+552
-39
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,16 +1211,81 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
12111211
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
12121212
}
12131213

1214-
static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
1215-
for (const auto &CI : IA->ParseConstraints()) {
1214+
/// Compute the minimum number of AGPRs required to allocate the inline asm.
1215+
static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1216+
const CallBase &Call) {
1217+
unsigned ArgNo = 0;
1218+
unsigned ResNo = 0;
1219+
unsigned AGPRDefCount = 0;
1220+
unsigned AGPRUseCount = 0;
1221+
unsigned MaxPhysReg = 0;
1222+
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1223+
1224+
// TODO: Overestimates due to not accounting for tied operands
1225+
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1226+
Type *Ty = nullptr;
1227+
switch (CI.Type) {
1228+
case InlineAsm::isOutput: {
1229+
Ty = Call.getType();
1230+
if (auto *STy = dyn_cast<StructType>(Ty))
1231+
Ty = STy->getElementType(ResNo);
1232+
++ResNo;
1233+
break;
1234+
}
1235+
case InlineAsm::isInput: {
1236+
Ty = Call.getArgOperand(ArgNo++)->getType();
1237+
break;
1238+
}
1239+
case InlineAsm::isLabel:
1240+
continue;
1241+
case InlineAsm::isClobber:
1242+
// Parse the physical register reference.
1243+
break;
1244+
}
1245+
12161246
for (StringRef Code : CI.Codes) {
1217-
Code.consume_front("{");
1218-
if (Code.starts_with("a"))
1219-
return true;
1247+
unsigned RegCount = 0;
1248+
if (Code.starts_with("a")) {
1249+
// Virtual register, compute number of registers based on the type.
1250+
//
1251+
// We ought to be going through TargetLowering to get the number of
1252+
// registers, but we should avoid the dependence on CodeGen here.
1253+
RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1254+
} else {
1255+
// Physical register reference
1256+
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1257+
if (Kind == 'a') {
1258+
RegCount = NumRegs;
1259+
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1260+
}
1261+
1262+
continue;
1263+
}
1264+
1265+
if (CI.Type == InlineAsm::isOutput) {
1266+
// Apply tuple alignment requirement
1267+
//
1268+
// TODO: This is more conservative than necessary.
1269+
AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1270+
1271+
AGPRDefCount += RegCount;
1272+
if (CI.isEarlyClobber) {
1273+
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1274+
AGPRUseCount += RegCount;
1275+
}
1276+
} else {
1277+
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1278+
AGPRUseCount += RegCount;
1279+
}
12201280
}
12211281
}
12221282

1223-
return false;
1283+
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1284+
1285+
// TODO: This is overly conservative. If there are any physical registers,
1286+
// allocate any virtual registers after them so we don't have to solve optimal
1287+
// packing.
1288+
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12241289
}
12251290

12261291
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1259,7 +1324,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
12591324
const Function *Callee = dyn_cast<Function>(CalleeOp);
12601325
if (!Callee) {
12611326
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1262-
return !inlineAsmUsesAGPRs(IA);
1327+
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
12631328
return false;
12641329
}
12651330

0 commit comments

Comments
 (0)