Skip to content

Commit 939c410

Browse files
add target hook to capture special operand uniformity and update UA to use it
1 parent 860b485 commit 939c410

File tree

10 files changed

+70
-2
lines changed

10 files changed

+70
-2
lines changed

llvm/include/llvm/ADT/GenericUniformityImpl.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "llvm/ADT/SmallPtrSet.h"
5252
#include "llvm/ADT/SparseBitVector.h"
5353
#include "llvm/ADT/StringExtras.h"
54+
#include "llvm/ADT/Uniformity.h"
5455
#include "llvm/Support/raw_ostream.h"
5556

5657
#define DEBUG_TYPE "uniformity"
@@ -407,6 +408,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
407408
void recordTemporalDivergence(ConstValueRefT, const InstructionT *,
408409
const CycleT *);
409410

411+
bool isAnyOperandUniform(const InstructionT &Instr) const;
412+
413+
/// \brief keep track of special target intrinsics that can be proven uniform.
414+
void addSpecialUniformIntrinsic(const InstructionT &Instr);
415+
410416
protected:
411417
/// \brief Value/block pair representing a single phi input.
412418
struct PhiInput {
@@ -429,6 +435,8 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
429435
// Internal worklist for divergence propagation.
430436
std::vector<const InstructionT *> Worklist;
431437

438+
// Special intrinsics list which can be proven uniform.
439+
llvm::SmallPtrSet<const InstructionT *, 8> SpecialUniformIntrinsics;
432440
/// \brief Mark \p Term as divergent and push all Instructions that become
433441
/// divergent as a result on the worklist.
434442
void analyzeControlDivergence(const InstructionT &Term);
@@ -824,6 +832,12 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
824832
UniformOverrides.insert(&Instr);
825833
}
826834

835+
template <typename ContextT>
836+
void GenericUniformityAnalysisImpl<ContextT>::addSpecialUniformIntrinsic(
837+
const InstructionT &Instr) {
838+
SpecialUniformIntrinsics.insert(&Instr);
839+
}
840+
827841
// Mark as divergent all external uses of values defined in \p DefCycle.
828842
//
829843
// A value V defined by a block B inside \p DefCycle may be used outside the

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "llvm/ADT/APInt.h"
2525
#include "llvm/ADT/ArrayRef.h"
26+
#include "llvm/ADT/Uniformity.h"
2627
#include "llvm/Analysis/IVDescriptors.h"
2728
#include "llvm/IR/FMF.h"
2829
#include "llvm/IR/InstrTypes.h"
@@ -1916,6 +1917,8 @@ class TargetTransformInfo {
19161917
const Function &F,
19171918
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const;
19181919

1920+
bool isSpecialUniformIntrinsic(const Instruction &I) const;
1921+
19191922
private:
19201923
std::unique_ptr<const TargetTransformInfoImplBase> TTIImpl;
19211924
};

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,10 @@ class TargetTransformInfoImplBase {
11471147
const Function &F,
11481148
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
11491149

1150+
virtual bool isSpecialUniformIntrinsic(const Instruction &I) const {
1151+
return false;
1152+
}
1153+
11501154
protected:
11511155
// Obtain the minimum required size to hold the value (without the sign)
11521156
// In case of a vector it returns the min required size for one element.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,6 +1476,11 @@ void TargetTransformInfo::collectKernelLaunchBounds(
14761476
return TTIImpl->collectKernelLaunchBounds(F, LB);
14771477
}
14781478

1479+
bool TargetTransformInfo::isSpecialUniformIntrinsic(
1480+
const Instruction &I) const {
1481+
return TTIImpl->isSpecialUniformIntrinsic(I);
1482+
}
1483+
14791484
TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default;
14801485

14811486
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}

llvm/lib/Analysis/UniformityAnalysis.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,26 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
4242
return false;
4343
}
4444

45+
template <>
46+
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isAnyOperandUniform(
47+
const Instruction &I) const {
48+
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
49+
if (!isa<Instruction>(I.getOperand(i)) && !isa<Argument>(I.getOperand(i)))
50+
continue;
51+
if (!isDivergentUse(I.getOperandUse(i)))
52+
return true;
53+
}
54+
return false;
55+
}
56+
4557
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
4658
for (auto &I : instructions(F)) {
4759
if (TTI->isSourceOfDivergence(&I))
4860
markDivergent(I);
4961
else if (TTI->isAlwaysUniform(&I))
5062
addUniformOverride(I);
63+
else if (TTI->isSpecialUniformIntrinsic(I))
64+
addSpecialUniformIntrinsic(I);
5165
}
5266
for (auto &Arg : F.args()) {
5367
if (TTI->isSourceOfDivergence(&Arg)) {
@@ -61,6 +75,11 @@ void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
6175
const Value *V) {
6276
for (const auto *User : V->users()) {
6377
if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
78+
if (SpecialUniformIntrinsics.count(UserInstr) &&
79+
isAnyOperandUniform(*UserInstr)) {
80+
addUniformOverride(*UserInstr);
81+
continue;
82+
}
6483
markDivergent(*UserInstr);
6584
}
6685
}

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ def : SourceOfDivergence<int_amdgcn_live_mask>;
317317
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
318318
def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
319319
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
320-
def : SourceOfDivergence<int_amdgcn_permlane16>;
321320
def : SourceOfDivergence<int_amdgcn_permlanex16>;
322321
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
323322
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,3 +1422,15 @@ void GCNTTIImpl::collectKernelLaunchBounds(
14221422
LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
14231423
LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
14241424
}
1425+
1426+
bool GCNTTIImpl::isSpecialUniformIntrinsic(const Instruction &I) const {
1427+
if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
1428+
switch (II->getIntrinsicID()) {
1429+
case Intrinsic::amdgcn_permlane16:
1430+
return true;
1431+
default:
1432+
return false;
1433+
}
1434+
}
1435+
return false;
1436+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
290290
void collectKernelLaunchBounds(
291291
const Function &F,
292292
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
293+
bool isSpecialUniformIntrinsic(const Instruction &I) const override;
293294
};
294295

295296
} // end namespace llvm

llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) #0 {
77
ret void
88
}
99

10-
; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
10+
; CHECK: ALL VALUES UNIFORM
1111
define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
1212
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
1313
store i32 %v, ptr addrspace(1) %out
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s
3+
4+
; CHECK: ALL VALUES UNIFORM
5+
define amdgpu_kernel void @v_permlane16_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
6+
%v = call i32 @llvm.amdgcn.permlane16.i32(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
7+
store i32 %v, ptr addrspace(1) %out
8+
ret void
9+
}
10+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
11+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)