Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 72 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,16 +1211,81 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}

static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
for (const auto &CI : IA->ParseConstraints()) {
/// Compute the minimum number of AGPRs required to allocate the inline asm.
static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
const CallBase &Call) {
unsigned ArgNo = 0;
unsigned ResNo = 0;
unsigned AGPRDefCount = 0;
unsigned AGPRUseCount = 0;
unsigned MaxPhysReg = 0;
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();

// TODO: Overestimates due to not accounting for tied operands
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
Type *Ty = nullptr;
switch (CI.Type) {
case InlineAsm::isOutput: {
Ty = Call.getType();
if (auto *STy = dyn_cast<StructType>(Ty))
Ty = STy->getElementType(ResNo);
++ResNo;
break;
}
case InlineAsm::isInput: {
Ty = Call.getArgOperand(ArgNo++)->getType();
break;
}
case InlineAsm::isLabel:
continue;
case InlineAsm::isClobber:
// Parse the physical register reference.
break;
}

for (StringRef Code : CI.Codes) {
Code.consume_front("{");
if (Code.starts_with("a"))
return true;
unsigned RegCount = 0;
if (Code.starts_with("a")) {
// Virtual register, compute number of registers based on the type.
//
// We ought to be going through TargetLowering to get the number of
// registers, but we should avoid the dependence on CodeGen here.
RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
} else {
// Physical register reference
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
if (Kind == 'a') {
RegCount = NumRegs;
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
}

continue;
}

if (CI.Type == InlineAsm::isOutput) {
// Apply tuple alignment requirement
//
// TODO: This is more conservative than necessary.
AGPRDefCount = alignTo(AGPRDefCount, RegCount);

AGPRDefCount += RegCount;
if (CI.isEarlyClobber) {
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
AGPRUseCount += RegCount;
}
} else {
AGPRUseCount = alignTo(AGPRUseCount, RegCount);
AGPRUseCount += RegCount;
}
}
}

return false;
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);

// TODO: This is overly conservative. If there are any physical registers,
// allocate any virtual registers after them so we don't have to solve optimal
// packing.
return std::min(MaxVirtReg + MaxPhysReg, 256u);
}

// TODO: Migrate to range merge of amdgpu-agpr-alloc.
Expand Down Expand Up @@ -1259,7 +1324,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
const Function *Callee = dyn_cast<Function>(CalleeOp);
if (!Callee) {
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
return !inlineAsmUsesAGPRs(IA);
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
return false;
}

Expand Down
Loading