Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions llvm/include/llvm/ADT/GenericUniformityImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Uniformity.h"
#include "llvm/Support/raw_ostream.h"

#define DEBUG_TYPE "uniformity"
Expand Down Expand Up @@ -407,6 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
const CycleT *);

bool isAnyOperandUniform(const InstructionT &Instr) const;

/// \brief keep track of special target intrinsics that can be proven uniform.
void addSpecialUniformIntrinsic(const InstructionT &Instr);
Comment on lines +413 to +414
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me what 'special' means. Is there a better name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The term special means here that intrinsic can be uniform though not all of its operands are uniform.

Right, I am also not convinced of this name. will rename it later based on refined functionality.


protected:
/// \brief Value/block pair representing a single phi input.
struct PhiInput {
Expand All @@ -429,6 +435,8 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
// Internal worklist for divergence propagation.
std::vector<const InstructionT *> Worklist;

// Special intrinsics list which can be proven uniform.
llvm::SmallPtrSet<const InstructionT *, 8> SpecialUniformIntrinsics;
/// \brief Mark \p Term as divergent and push all Instructions that become
/// divergent as a result on the worklist.
void analyzeControlDivergence(const InstructionT &Term);
Expand Down Expand Up @@ -824,6 +832,12 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
UniformOverrides.insert(&Instr);
}

template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::addSpecialUniformIntrinsic(
const InstructionT &Instr) {
SpecialUniformIntrinsics.insert(&Instr);
}

// Mark as divergent all external uses of values defined in \p DefCycle.
//
// A value V defined by a block B inside \p DefCycle may be used outside the
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Uniformity.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
Expand Down Expand Up @@ -1916,6 +1917,8 @@ class TargetTransformInfo {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;

bool isSpecialUniformIntrinsic(const Instruction &I) const;

private:
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
};
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1147,6 +1147,10 @@ class TargetTransformInfoImplBase {
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}

virtual bool isSpecialUniformIntrinsic(const Instruction &I) const {
return false;
}

protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
return TTIImpl->collectKernelLaunchBounds(F, LB);
}

bool TargetTransformInfo::isSpecialUniformIntrinsic(
const Instruction &I) const {
return TTIImpl->isSpecialUniformIntrinsic(I);
}

TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;

TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
Expand Down
45 changes: 32 additions & 13 deletions llvm/lib/Analysis/UniformityAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,39 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
return markDivergent(cast<Value>(&Instr));
}

template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
const Use &U) const {
const auto *V = U.get();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no auto

if (isDivergent(V))
return true;
if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
const auto *UseInstr = cast<Instruction>(U.getUser());
return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
}
return false;
}

template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isAnyOperandUniform(
const Instruction &I) const {
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
if (!isa<Instruction>(I.getOperand(i)) && !isa<Argument>(I.getOperand(i)))
continue;
if (!isDivergentUse(I.getOperandUse(i)))
return true;
}
return false;
}

template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
if (TTI->isSourceOfDivergence(&I))
markDivergent(I);
else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
else if (TTI->isSpecialUniformIntrinsic(I))
addSpecialUniformIntrinsic(I);
}
for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
Expand All @@ -48,6 +75,11 @@ void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
const Value *V) {
for (const auto *User : V->users()) {
if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
if (SpecialUniformIntrinsics.count(UserInstr) &&
isAnyOperandUniform(*UserInstr)) {
Comment on lines +78 to +79
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is too specific of a "specially uniform" check. I'd expect this operand validation to be an operation dependent property

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think as @jayfoad suggested. Probably I should let this be decided by the target. The only thing is that it would be costlier, and the decision of uniformity will be taken outside of UA, which I am not sure is acceptable?

addUniformOverride(*UserInstr);
continue;
}
markDivergent(*UserInstr);
}
}
Expand Down Expand Up @@ -88,19 +120,6 @@ void llvm::GenericUniformityAnalysisImpl<
}
}

template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
const Use &U) const {
const auto *V = U.get();
if (isDivergent(V))
return true;
if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
const auto *UseInstr = cast<Instruction>(U.getUser());
return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
}
return false;
}

// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
def : SourceOfDivergence<int_amdgcn_permlane16>;
def : SourceOfDivergence<int_amdgcn_permlanex16>;
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1422,3 +1422,16 @@ void GCNTTIImpl::collectKernelLaunchBounds(
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
}

bool GCNTTIImpl::isSpecialUniformIntrinsic(const Instruction &I) const {
if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_permlane16:
case Intrinsic::amdgcn_permlanex16:
return true;
default:
return false;
}
}
return false;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
void collectKernelLaunchBounds(
const Function &F,
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
bool isSpecialUniformIntrinsic(const Instruction &I) const override;
};

} // end namespace llvm
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
ret void
}

; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
ret void
}

; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/uniform_intrinsic.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s

; CHECK: ALL VALUES UNIFORM
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't always always uniform, so need test coverage for all the conditions that would make this divergent

define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
ret void
}

; CHECK: ALL VALUES UNIFORM
define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
%v = call i32 @llvm.amdgcn.permlanex16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
store i32 %v, ptr addrspace(1) %out
ret void
}

;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}