Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 127 additions & 29 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ static cl::opt<bool> UseCostHeur(
"Experimentally, results are mixed, so this should be set on a "
"case-by-case basis."));

static cl::opt<bool> UseDowncastOps(
"amdgpu-igrouplp-use-downcast-ops", cl::Hidden,
cl::desc("Whether to use the downcast alternative OpCodes instead of the "
"current OpCode. Under certain conditions, some OpCodes may be "
"downcast "
"to an alternative sequence after scheduling (e.g. V_PK_MUL_F32 "
"-> V_MUL_F32). "
"This flag enables SchedGroup classification based on the "
"alternative."),
cl::init(false));

// Components of the mask that determines which instruction types may be may be
// classified into a SchedGroup.
enum class SchedGroupMask {
Expand Down Expand Up @@ -133,6 +144,8 @@ class SchedGroup {
// SGID is used to map instructions to candidate SchedGroups
unsigned SGID;

unsigned CurrentSize = 0;

// The different rules each instruction in this SchedGroup must conform to
SmallVector<std::shared_ptr<InstructionRule>, 4> Rules;

Expand All @@ -143,9 +156,14 @@ class SchedGroup {
bool tryAddEdge(SUnit *A, SUnit *B);

// Use SGMask to determine whether we can classify MI as a member of this
// SchedGroup object.
// SchedGroup object. If UseDowncastOps is specified, and this is a candidate
// for downcasting, then use the DownCasted OpCodes.
bool canAddMI(const MachineInstr &MI) const;

// Use SGMask to determine whether we can classify an opcode as a member of
// this SchedGroup object.
bool canAddSingleMI(unsigned Opcode, bool MayLoad, bool MayStore) const;

public:
// Collection of SUnits that are classified as members of this group.
SmallVector<SUnit *, 32> Collection;
Expand Down Expand Up @@ -176,7 +194,7 @@ class SchedGroup {
void link(SchedGroup &OtherGroup);

// Returns true if no more instructions may be added to this group.
bool isFull() const { return MaxSize && Collection.size() >= *MaxSize; }
bool isFull() const { return MaxSize && CurrentSize >= *MaxSize; }

// Append a constraint that SUs must meet in order to fit into this
// SchedGroup. Since many rules involve the relationship between a SchedGroup
Expand All @@ -202,10 +220,55 @@ class SchedGroup {
<< format_hex((int)SGMask, 10, true) << " adding "
<< *SU.getInstr());
Collection.push_back(&SU);
MachineInstr &MI = *SU.getInstr();
if (!UseDowncastOps || MI.isMetaInstruction()) {
++CurrentSize;
return;
}

SmallVector<unsigned, 4> UnpackSequence;
if (!TII->getDowncastSequence(MI, UnpackSequence,
DAG->MF.getSubtarget<GCNSubtarget>())) {
++CurrentSize;
return;
}

for (unsigned UnpackOp : UnpackSequence) {
if (canAddSingleMI(UnpackOp, MI.mayLoad(), MI.mayStore()))
++CurrentSize;
}
}

// Remove last element in the SchedGroup
void pop() { Collection.pop_back(); }
void pop() {
SUnit *SU = Collection.pop_back_val();
MachineInstr &MI = *SU->getInstr();
if (!UseDowncastOps || MI.isMetaInstruction()) {
assert(CurrentSize >= 1);
--CurrentSize;
return;
}

SmallVector<unsigned, 4> UnpackSequence;
if (!TII->getDowncastSequence(MI, UnpackSequence,
DAG->MF.getSubtarget<GCNSubtarget>())) {
assert(CurrentSize >= 1);
--CurrentSize;
return;
}

for (unsigned UnpackOp : UnpackSequence) {
if (canAddSingleMI(UnpackOp, MI.mayLoad(), MI.mayStore())) {
assert(CurrentSize >= 1);
--CurrentSize;
}
}
}

void clear() {
Collection.clear();
CurrentSize = 0;
}

// Identify and add all relevant SUs from the DAG to this SchedGroup.
void initSchedGroup();
Expand Down Expand Up @@ -371,16 +434,16 @@ class PipelineSolver {
};

void PipelineSolver::reset() {

for (auto &SyncPipeline : CurrPipeline) {
for (auto &SG : SyncPipeline) {
SmallVector<SUnit *, 32> TempCollection = SG.Collection;
SG.Collection.clear();
SG.clear();
auto *SchedBarr = llvm::find_if(TempCollection, [](SUnit *SU) {
return SU->getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER;
});
if (SchedBarr != TempCollection.end())
SG.Collection.push_back(*SchedBarr);
if (SchedBarr != TempCollection.end()) {
SG.add(**SchedBarr);
}
}
}

Expand Down Expand Up @@ -2386,64 +2449,99 @@ bool SchedGroup::tryAddEdge(SUnit *A, SUnit *B) {
return false;
}

bool SchedGroup::canAddMI(const MachineInstr &MI) const {
bool SchedGroup::canAddSingleMI(unsigned Opcode, bool MayLoad,
bool MayStore) const {
bool Result = false;
if (MI.isMetaInstruction())
Result = false;

else if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(MI) || TII->isMFMAorWMMA(MI) || TII->isSALU(MI) ||
TII->isTRANS(MI)))
Result = !MI.mayLoadOrStore();
if (((SGMask & SchedGroupMask::ALU) != SchedGroupMask::NONE) &&
(TII->isVALU(Opcode) || TII->isMFMAorWMMA(Opcode) ||
TII->isSALU(Opcode) || TII->isTRANS(Opcode)))
Result = !(MayLoad || MayStore);

else if (((SGMask & SchedGroupMask::VALU) != SchedGroupMask::NONE) &&
TII->isVALU(MI) && !TII->isMFMAorWMMA(MI) && !TII->isTRANS(MI)) {
TII->isVALU(Opcode) && !TII->isMFMAorWMMA(Opcode) &&
!TII->isTRANS(Opcode)) {
// Some memory instructions may be marked as VALU (e.g. BUFFER_LOAD_*_LDS).
// For our purposes, these shall not be classified as VALU as this results
// in unexpected behavior.
Result = !MI.mayLoadOrStore();
Result = !(MayLoad || MayStore);
}

else if (((SGMask & SchedGroupMask::SALU) != SchedGroupMask::NONE) &&
TII->isSALU(MI))
Result = !MI.mayLoadOrStore();
TII->isSALU(Opcode))
Result = !(MayLoad || MayStore);

else if (((SGMask & SchedGroupMask::MFMA) != SchedGroupMask::NONE) &&
TII->isMFMAorWMMA(MI))
TII->isMFMAorWMMA(Opcode))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
TII->isVMEM(MI))
(TII->isVMEM(Opcode) || TII->isFLAT(Opcode)))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() && TII->isVMEM(MI))
MayLoad && (TII->isVMEM(Opcode) || TII->isFLAT(Opcode)))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() && TII->isVMEM(MI))
MayStore && (TII->isVMEM(Opcode) || TII->isFLAT(Opcode)))
Result = true;

else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
TII->isDS(MI))
TII->isDS(Opcode))
Result = true;

else if (((SGMask & SchedGroupMask::DS_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() && TII->isDS(MI))
MayLoad && TII->isDS(Opcode))
Result = true;

else if (((SGMask & SchedGroupMask::DS_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() && TII->isDS(MI))
MayStore && TII->isDS(Opcode))
Result = true;

else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) &&
TII->isTRANS(MI))
TII->isTRANS(Opcode))
Result = true;

LLVM_DEBUG(
dbgs() << "For SchedGroup with mask " << format_hex((int)SGMask, 10, true)
<< (Result ? " could classify " : " unable to classify ") << MI);
return Result;
}

bool SchedGroup::canAddMI(const MachineInstr &MI) const {
bool Result = false;

auto emitDebug = [this](const MachineInstr &MI, bool Result) {
LLVM_DEBUG(dbgs() << "For SchedGroup with mask "
<< format_hex((int)SGMask, 10, true)
<< (Result ? " could classify " : " unable to classify ")
<< MI);
};

if (MI.isMetaInstruction()) {
emitDebug(MI, false);
return false;
}

if (!UseDowncastOps) {
Result = canAddSingleMI(MI.getOpcode(), MI.mayLoad(), MI.mayStore());
emitDebug(MI, Result);
return Result;
}

SmallVector<unsigned, 4> UnpackSequence;
if (!TII->getDowncastSequence(MI, UnpackSequence,
DAG->MF.getSubtarget<GCNSubtarget>())) {
Result = canAddSingleMI(MI.getOpcode(), MI.mayLoad(), MI.mayStore());
emitDebug(MI, Result);
return Result;
}

// We have an unpackable MI, check if the unpack OpCodes are classifiable by
// this mask.
for (unsigned UnpackOp : UnpackSequence) {
Result |= canAddSingleMI(UnpackOp, MI.mayLoad(), MI.mayStore());
}

emitDebug(MI, Result);
return Result;
}

Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6366,6 +6366,41 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
return isImmOperandLegal(MI, OpIdx, *MO);
}

bool SIInstrInfo::getDowncastSequence(const MachineInstr &MI,
SmallVectorImpl<unsigned> &Sequence,
const GCNSubtarget &ST) const {
bool isGFX940Plus = ST.hasGFX940Insts();
switch (MI.getOpcode()) {
// Use 64 bit encoding to allow use of VOP3 instructions.
// VOP3 e64 instructions allow source modifiers
// e32 instructions don't allow source modifiers.
case AMDGPU::V_PK_ADD_F32: {
if (!isGFX940Plus)
return false;
Sequence.push_back(AMDGPU::V_ADD_F32_e64);
Sequence.push_back(AMDGPU::V_ADD_F32_e64);
return true;
}
case AMDGPU::V_PK_MUL_F32: {
if (!isGFX940Plus)
return false;
Sequence.push_back(AMDGPU::V_MUL_F32_e64);
Sequence.push_back(AMDGPU::V_MUL_F32_e64);
return true;
}
case AMDGPU::V_PK_FMA_F32: {
if (!isGFX940Plus)
return false;
Sequence.push_back(AMDGPU::V_FMA_F32_e64);
Sequence.push_back(AMDGPU::V_FMA_F32_e64);
return true;
}
default:
return false;
}
llvm_unreachable("Fully covered switch");
}

bool SIInstrInfo::isNeverCoissue(MachineInstr &MI) const {
bool IsGFX950Only = ST.hasGFX950Insts();
bool IsGFX940Only = ST.hasGFX940Insts();
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

bool isNeverCoissue(MachineInstr &MI) const;

bool getDowncastSequence(const MachineInstr &MI,
SmallVectorImpl<unsigned> &Sequence,
const GCNSubtarget &ST) const;

/// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isLegalAV64PseudoImm(uint64_t Imm) const;

Expand Down
Loading