Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion llvm/include/llvm/ADT/GenericSSAContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ template <typename _FunctionT> class GenericSSAContext {

// The null value for ValueRefT. For LLVM IR and MIR, this is simply the
// default constructed value.
static constexpr ValueRefT *ValueRefNull = {};
static constexpr ValueRefT ValueRefNull = {};

// An InstructionT usually defines one or more ValueT objects.
using InstructionT = typename SSATraits::InstructionT;
Expand Down Expand Up @@ -96,6 +96,10 @@ template <typename _FunctionT> class GenericSSAContext {
static bool isConstantOrUndefValuePhi(const InstructionT &Instr);
const BlockT *getDefBlock(ConstValueRefT value) const;

void getPhiInputs(const InstructionT &Instr,
SmallVectorImpl<ConstValueRefT> &Values,
SmallVectorImpl<const BlockT *> &Blocks) const;

Printable print(const BlockT *block) const;
Printable printAsOperand(const BlockT *BB) const;
Printable print(const InstructionT *inst) const;
Expand Down
79 changes: 66 additions & 13 deletions llvm/include/llvm/ADT/GenericUniformityImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,11 @@ template <typename ContextT> class GenericUniformityAnalysisImpl {
/// the worklist.
void taintAndPushAllDefs(const BlockT &JoinBlock);

/// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
/// the worklist.
void taintAndPushPhiNodes(const BlockT &JoinBlock);
/// \brief Mark phi nodes in \p JoinBlock as divergent and push them on
/// the worklist if they are divergent over the path from \p JoinBlock
/// to \p DivTermBlock.
void taintAndPushPhiNodes(const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc);

/// \brief Identify all Instructions that become divergent because \p DivExit
/// is a divergent cycle exit of \p DivCycle. Mark those instructions as
Expand Down Expand Up @@ -917,19 +919,70 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
/// Mark divergent phi nodes in a join block
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
const BlockT &JoinBlock) {
const BlockT &JoinBlock, const BlockT &DivTermBlock,
const DivergenceDescriptorT &DivDesc) {
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
<< "\n");
for (const auto &Phi : JoinBlock.phis()) {
// FIXME: The non-undef value is not constant per se; it just happens to be
// uniform and may not dominate this PHI. So assuming that the same value
// reaches along all incoming edges may itself be undefined behaviour. This
// particular interpretation of the undef value was added to
// DivergenceAnalysis in the following review:
//
// https://reviews.llvm.org/D19013
if (ContextT::isConstantOrUndefValuePhi(Phi))
// Attempt to maintain uniformity for PHIs by considering control
// dependencies before marking them.
SmallVector<ConstValueRefT> Values;
SmallVector<const BlockT *> Blocks;
Context.getPhiInputs(Phi, Values, Blocks);
assert(Blocks.size() == Values.size());

// Allow an empty Blocks/Values list to signify getPhiInputs is not
// implemented; in which case no uniformity is possible.
bool HasSingleValue = !Values.empty();
bool UniformOnPath = HasSingleValue;

std::optional<ConstValueRefT> PhiCommon, PathCommon;
for (unsigned I = 0; I < Blocks.size() && (UniformOnPath || HasSingleValue);
++I) {
// FIXME: We assume undefs are uniform and/or do not dominate the PHI
// in the presence of other constant or uniform values.
// This particular interpretation of the undef value was added to
// DivergenceAnalysis in the following review:
//
// https://reviews.llvm.org/D19013
if (!Values[I])
continue;

// Track common value for all inputs.
if (!PhiCommon)
PhiCommon = Values[I];
else if (Values[I] != *PhiCommon)
HasSingleValue = false;

// Divergent path does not have uniform value.
if (!UniformOnPath)
continue;

// Only consider predecessors on divergent path.
if (Blocks[I] != &DivTermBlock &&
!DivDesc.BlockLabels.lookup_or(Blocks[I], nullptr))
continue;

// Phi is reached via divergent exit (i.e. respect temporal divergence).
if (DivDesc.CycleDivBlocks.contains(Blocks[I])) {
UniformOnPath = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point, HasSingleValue might still be true, and then the PHINode will not get marked as divergent. But temporal divergence means that even the same input can have different values for different threads. Either this check should happen early to mark the PHINode divergent, or in fact this check is not needed at all, because eventually UA will mark all temporally divergent uses anyway when it calls propagateTemporalDivergence().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this point, HasSingleValue might still be true, and then the PHINode will not get marked as divergent.
Yes, this is what the current implementation prior to this patch does.

The key difference between this test and propagateTemporalDivergence is whether it is related to the CFG edge or the value definition.
propagateTemporalDivergence only marks PHIs if they use a value defined within a loop.
This marks the PHI based on its reachability from a divergent exit.

The test hidden_doublebreak_diverge in hidden_loopdiverge.ll highlights this.
The PHI %div.merge.y = phi i32 [ 42, %X ], [ %b, %B ] only uses values defined outside the function, so the values themselves will not cause temporal divergence.

To borrow Jay's summary:

Once we go entry->H->G, there is a divergent branch which can choose either G->X->Y or G->L->C->H->B->Y. Therefore the choice of whether we arrive at Y from X or from B is divergent.

The divergent path via B only has a single value (%b), so this PHI would not be marked divergent (UniformOnPath = true) unless we consider the temporal divergence of the path as well.

continue;
}

// Phi uniformity is maintained if all values on divergent path match.
if (!PathCommon)
PathCommon = Values[I];
else if (Values[I] != *PathCommon) {
UniformOnPath = false;
assert(!HasSingleValue);
break;
}
}

if (UniformOnPath || HasSingleValue)
continue;

LLVM_DEBUG(dbgs() << "tainted: " << Phi << "\n");
markDivergent(Phi);
}
}
Expand Down Expand Up @@ -1087,7 +1140,7 @@ void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
DivCycles.push_back(Outermost);
continue;
}
taintAndPushPhiNodes(*JoinBlock);
taintAndPushPhiNodes(*JoinBlock, *DivTermBlock, DivDesc);
}

// Sort by order of decreasing depth. This allows later cycles to be skipped
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/CodeGen/MachineSSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,25 @@ bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return true;
}

template <>
void MachineSSAContext::getPhiInputs(
const MachineInstr &Phi, SmallVectorImpl<Register> &Values,
SmallVectorImpl<const MachineBasicBlock *> &Blocks) const {
assert(Phi.isPHI());

const MachineRegisterInfo &MRI = Phi.getMF()->getRegInfo();
// const Register DstReg = Phi.getOperand(0).getReg();
for (unsigned Idx = 1, End = Phi.getNumOperands(); Idx < End; Idx += 2) {
Register Incoming = Phi.getOperand(Idx).getReg();
MachineInstr *Def = MRI.getVRegDef(Incoming);
// FIXME: should this also consider Incoming == DstReg undef?
if (Def && isUndef(*Def))
Incoming = ValueRefNull;
Values.push_back(Incoming);
Blocks.push_back(Phi.getOperand(Idx + 1).getMBB());
}
}

template <>
Intrinsic::ID MachineSSAContext::getIntrinsicID(const MachineInstr &MI) {
if (auto *GI = dyn_cast<GIntrinsic>(&MI))
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/IR/SSAContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "llvm/IR/SSAContext.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
Expand Down Expand Up @@ -68,6 +69,23 @@ bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
return false;
}

template <>
void SSAContext::getPhiInputs(
const Instruction &Instr, SmallVectorImpl<const Value *> &Values,
SmallVectorImpl<const BasicBlock *> &Blocks) const {
assert(isa<PHINode>(Instr));
const PHINode *Phi = static_cast<const PHINode *>(&Instr);
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
const Value *Incoming = Phi->getIncomingValue(I);
const BasicBlock *Block = Phi->getIncomingBlock(I);
// FIXME: should this also consider Incoming == &Instr undef?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If all the dominating inputs are well-defined, then this self-reference is not undef, right? If at this point we find that indeed Incoming == &Instr, then I am not able to decide if treating it as undef is always correct. Ultimately, it the uniformity itself has to be false unless proven otherwise.

if (isa<UndefValue>(Incoming))
Incoming = ValueRefNull;
Values.push_back(Incoming);
Blocks.push_back(Block);
}
}

template <> Intrinsic::ID SSAContext::getIntrinsicID(const Instruction &I) {
if (auto *CB = dyn_cast<CallBase>(&I))
return CB->getIntrinsicID();
Expand Down
121 changes: 121 additions & 0 deletions llvm/test/Analysis/UniformityAnalysis/AMDGPU/phi_no_divergence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; RUN: opt %s -mtriple amdgcn-- -passes='print<uniformity>' -disable-output 2>&1 | FileCheck %s

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be useful to have one or two lines explaining what's the distinguishing pattern in each function compared to other functions in the file.

; Test PHIs that are uniform because they have a common/constant value over
; the divergent paths.

; Loop is uniform because loop exit PHI has constant value over all internal
; divergent paths.
define amdgpu_kernel void @no_divergent_exit1(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit1'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %body.1 ], [ %next.exit.val, %body.2 ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %header
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %header

end:
ret void
}

; As no_divergent_exit1 but with merge block before exit.
define amdgpu_kernel void @no_divergent_exit2(i32 %a, i32 %b, i32 %c) #0 {
; CHECK-LABEL: for function 'no_divergent_exit2'
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br label %header

header:
%loop.b = phi i32 [ %b, %entry ], [ %merge.b, %merge ]
; CHECK-NOT: DIVERGENT: %loop.b =
%loop.c = phi i32 [ %c, %entry ], [ %merge.c, %merge ]
; CHECK: DIVERGENT: %loop.c =
%exit.val = phi i32 [ %a, %entry ], [ %next.exit.val, %merge ]
; CHECK-NOT: DIVERGENT: %exit.val =
%exit.cond = icmp slt i32 %exit.val, 42
; CHECK-NOT: DIVERGENT: %exit.cond =
br i1 %exit.cond, label %end, label %body.1
; CHECK-NOT: DIVERGENT: br i1 %exit.cond,

body.1:
%new.b = add i32 %loop.b, 1
; CHECK-NOT: DIVERGENT: %new.b =
%next.exit.val = add i32 %exit.val, 1
; CHECK-NOT: DIVERGENT: %next.exit.val =
br i1 %div.cond, label %body.2, label %merge
; CHECK: DIVERGENT: br i1 %div.cond,

body.2:
%new.c = add i32 %loop.c, 1
; CHECK: DIVERGENT: %new.c =
br label %merge

merge:
%merge.b = phi i32 [ %new.b, %body.1 ], [ %new.b, %body.2 ]
; CHECK-NOT: DIVERGENT: %merge.b =
%merge.c = phi i32 [ %loop.c, %body.1 ], [ %new.c, %body.2 ]
; CHECK: DIVERGENT: %merge.c =
br label %header

end:
ret void
}

; Test PHI with constant value over divergent path without a loop.
define amdgpu_kernel void @no_loop_phi_divergence(i32 %a) #0 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%uni.cond = icmp slt i32 %a, 0
; CHECK-NOT: DIVERGENT: %uni.cond =
%div.cond = icmp slt i32 %tid, 0
; CHECK: DIVERGENT: %div.cond =
br i1 %uni.cond, label %div.branch.block, label %merge
; CHECK-NOT: DIVERGENT: br i1 %uni.cond,

div.branch.block:
br i1 %div.cond, label %div.block.1, label %div.block.2
; CHECK: DIVERGENT: br i1 %div.cond,

div.block.1:
br label %merge

div.block.2:
br label %merge

merge:
%uni.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 1, %div.block.2 ]
; CHECK-NOT: DIVERGENT: %uni.val =
%div.val = phi i32 [ 0, %entry ], [ 1, %div.block.1 ], [ 2, %div.block.2 ]
; CHECK: DIVERGENT: %div.val =
ret void
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a test with temporal divergence?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the existing tests cover this.

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }