Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/docs/AMDGPUUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,15 @@ The AMDGPU backend supports the following LLVM IR attributes.
"amdgpu-no-workgroup-id-z" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.workgroup.id.z intrinsic.

"amdgpu-no-cluster-id-x" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.cluster.id.x intrinsic.

"amdgpu-no-cluster-id-y" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.cluster.id.y intrinsic.

"amdgpu-no-cluster-id-z" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.cluster.id.z intrinsic.

"amdgpu-no-dispatch-ptr" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.dispatch.ptr intrinsic.

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@ AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
AMDGPU_ATTRIBUTE(DEFAULT_QUEUE, "amdgpu-no-default-queue")
AMDGPU_ATTRIBUTE(COMPLETION_ACTION, "amdgpu-no-completion-action")
AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
AMDGPU_ATTRIBUTE(CLUSTER_ID_X, "amdgpu-no-cluster-id-x")
AMDGPU_ATTRIBUTE(CLUSTER_ID_Y, "amdgpu-no-cluster-id-y")
AMDGPU_ATTRIBUTE(CLUSTER_ID_Z, "amdgpu-no-cluster-id-z")

#undef AMDGPU_ATTRIBUTE
164 changes: 163 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return WORKGROUP_ID_Z;
case Intrinsic::amdgcn_cluster_id_x:
NonKernelOnly = true;
return CLUSTER_ID_X;
case Intrinsic::amdgcn_cluster_id_y:
return CLUSTER_ID_Y;
case Intrinsic::amdgcn_cluster_id_z:
return CLUSTER_ID_Z;
case Intrinsic::amdgcn_lds_kernel_id:
return LDS_KERNEL_ID;
case Intrinsic::amdgcn_dispatch_ptr:
Expand Down Expand Up @@ -1296,6 +1303,157 @@ struct AAAMDGPUNoAGPR

const char AAAMDGPUNoAGPR::ID = 0;

/// An abstract attribute to propagate the function attribute
/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
struct AAAMDGPUClusterDims
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Create an abstract attribute view for the position \p IRP.
static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
Attributor &A);

/// See AbstractAttribute::getName().
StringRef getName() const override { return "AAAMDGPUClusterDims"; }

/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAAMDGPUClusterDims.
static bool classof(const AbstractAttribute *AA) {
return AA->getIdAddr() == &ID;
}

virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;

/// Unique ID (due to the unique address)
static const char ID;
};

const char AAAMDGPUClusterDims::ID = 0;

struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
: AAAMDGPUClusterDims(IRP, A) {}

void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
assert(F && "empty associated function");

Attr = AMDGPU::ClusterDimsAttr::get(*F);

// No matter what a kernel function has, it is final.
if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
if (Attr.isUnknown())
indicatePessimisticFixpoint();
else
indicateOptimisticFixpoint();
}
}

const std::string getAsStr(Attributor *A) const override {
if (!getAssumed() || Attr.isUnknown())
return "unknown";
if (Attr.isNoCluster())
return "no";
if (Attr.isVariableDims())
return "variable";
return Attr.to_string();
}

void trackStatistics() const override {}

ChangeStatus updateImpl(Attributor &A) override {
auto OldState = Attr;

auto CheckCallSite = [&](AbstractCallSite CS) {
const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
*this, IRPosition::function(*CS.getInstruction()->getFunction()),
DepClassTy::REQUIRED);
if (!CallerAA || !CallerAA->isValidState())
return false;

return merge(CallerAA->getClusterDims());
};

bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CheckCallSite, *this,
/*RequireAllCallSites=*/true,
UsedAssumedInformation))
return indicatePessimisticFixpoint();

return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
}

ChangeStatus manifest(Attributor &A) override {
if (Attr.isUnknown())
return ChangeStatus::UNCHANGED;
return A.manifestAttrs(
getIRPosition(),
{Attribute::get(getAssociatedFunction()->getContext(), AttrName,
Attr.to_string())},
/*ForceReplace=*/true);
}

const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
return Attr;
}

private:
bool merge(const AMDGPU::ClusterDimsAttr &Other) {
// Case 1: Both of them are unknown yet, we do nothing and continue wait for
// propagation.
if (Attr.isUnknown() && Other.isUnknown())
return true;

// Case 2: The other is determined, but we are unknown yet, we simply take
// the other's value.
if (Attr.isUnknown()) {
Attr = Other;
return true;
}

// Case 3: We are determined but the other is unknown yet, we simply keep
// everything unchanged.
if (Other.isUnknown())
return true;

// After this point, both are determined.

// Case 4: If they are same, we do nothing.
if (Attr == Other)
return true;

// Now they are not same.

// Case 5: If either of us uses cluster (but not both; otherwise case 4
// would hold), then it is unknown whether cluster will be used, and the
// state is final, unlike case 1.
if (Attr.isNoCluster() || Other.isNoCluster()) {
Attr.setUnknown();
return false;
}

// Case 6: Both of us use cluster, but the dims are different, so the result
// is, cluster is used, but we just don't have a fixed dims.
Attr.setVariableDims();
return true;
}

AMDGPU::ClusterDimsAttr Attr;

static constexpr const char AttrName[] = "amdgpu-cluster-dims";
};

AAAMDGPUClusterDims &
AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
}

static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AMDGPUAttributorOptions Options,
ThinOrFullLTOPhase LTOPhase) {
Expand All @@ -1314,7 +1472,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
&AAIndirectCallInfo::ID});
&AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});

AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
Expand Down Expand Up @@ -1352,6 +1510,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
}

const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
if (!F->isDeclaration() && ST.hasClusters())
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));

for (auto &I : instructions(F)) {
Value *Ptr = nullptr;
if (auto *LI = dyn_cast<LoadInst>(&I))
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1827,7 +1827,7 @@ class ClusterDimsAttr {

bool isFixedDims() const { return getKind() == Kind::FixedDims; }

bool isVariableedDims() const { return getKind() == Kind::VariableDims; }
bool isVariableDims() const { return getKind() == Kind::VariableDims; }

void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); }

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ attributes #1 = { nounwind }

;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,9 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {

attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
Expand Down
Loading