Skip to content

Commit 1ce5a03

Browse files
eternastudentoigcbot
authored andcommitted
Added UMin atomic predication to Atomic Branch
Reduce the number of atomics hitting the same cache line by performing atomic predication: atomic_umin(src, x) -> t = read(src) if (x < t) { atomic_umin(src, x) }
1 parent 2c12e65 commit 1ce5a03

File tree

2 files changed

+37
-17
lines changed

2 files changed

+37
-17
lines changed

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6600,7 +6600,7 @@ namespace {
66006600
StringRef getPassName() const override { return "InsertBranchOpt"; }
66016601

66026602
bool runOnFunction(Function& F) override;
6603-
void atomicSpiltOpt(Function& F, int mode);
6603+
void atomicSplitOpt(Function& F, int mode);
66046604
void ThreeWayLoadSpiltOpt(Function& F);
66056605
void findOptCases(SelectInst* I);
66066606
bool HasSrcFromEE(Instruction* I, uint selNum, Instruction*& loadInst);
@@ -7252,11 +7252,20 @@ void InsertBranchOpt::ThreeWayLoadSpiltOpt(Function& F)
72527252
}
72537253
}
72547254

7255-
void InsertBranchOpt::atomicSpiltOpt(Function& F, int mode)
7255+
void InsertBranchOpt::atomicSplitOpt(Function& F, int mode)
72567256
{
7257+
enum Mode
7258+
{
7259+
Disable = 0x0, // Disabled IGC\EnableAtomicBranch = 0x0
7260+
ZeroAdd = BIT(0), // Enabled IGC\EnableAtomicBranch = 0x1
7261+
UMax = BIT(1), // Enabled IGC\EnableAtomicBranch = 0x2
7262+
UMin = BIT(2) // Enabled IGC\EnableAtomicBranch = 0x4
7263+
};
7264+
72577265
// Allow both modes to be applied
7258-
bool defaultMode = ( ( mode & 1 ) == 1 );
7259-
bool alternateUmaxMode = ( ( mode & 2 ) == 2 );
7266+
bool zeroAddMode = ( ( mode & ZeroAdd ) == ZeroAdd );
7267+
bool umaxMode = ( ( mode & UMax ) == UMax );
7268+
bool uminMode = ( ( mode & UMin ) == UMin );
72607269

72617270
auto createReadFromAtomic = []( IRBuilder<>& builder, Instruction* inst, bool isTyped )
72627271
{
@@ -7353,9 +7362,11 @@ void InsertBranchOpt::atomicSpiltOpt(Function& F, int mode)
73537362

73547363
AtomicOp atomicOp = static_cast<AtomicOp>(op->getZExtValue());
73557364

7356-
if( ( ( defaultMode ) && ( atomicOp == AtomicOp::EATOMIC_IADD || atomicOp == AtomicOp::EATOMIC_SUB ) )
7357-
||
7358-
atomicOp == AtomicOp::EATOMIC_UMAX )
7365+
if( ( zeroAddMode && ( atomicOp == AtomicOp::EATOMIC_IADD ||
7366+
atomicOp == AtomicOp::EATOMIC_SUB ||
7367+
atomicOp == AtomicOp::EATOMIC_UMAX ) )
7368+
|| ( umaxMode && ( atomicOp == AtomicOp::EATOMIC_UMAX ) )
7369+
|| ( uminMode && ( atomicOp == AtomicOp::EATOMIC_UMIN ) ) )
73597370
{
73607371
atomicSplit.push_back( std::make_pair( inst, atomicOp ) );
73617372
}
@@ -7380,12 +7391,14 @@ void InsertBranchOpt::atomicSpiltOpt(Function& F, int mode)
73807391
Instruction* ElseTerm = nullptr;
73817392
BasicBlock* MergeBlock = nullptr;
73827393

7394+
bool isModified = false;
73837395

7384-
if( op != AtomicOp::EATOMIC_UMAX || !alternateUmaxMode )
7396+
if ( ( zeroAddMode && ( op == AtomicOp::EATOMIC_IADD || op == AtomicOp::EATOMIC_SUB ) )
7397+
|| ( !umaxMode && ( op == AtomicOp::EATOMIC_UMAX ) ) )
73857398
{
73867399
// Create an if-then-else structure.
7387-
// if (cond!=0)
7388-
// use the original atomic add inst
7400+
// if (cond != 0)
7401+
// use the original atomic add/sub/umax inst
73897402
// else
73907403
// use typedread or load
73917404
Instruction* condInst = dyn_cast<Instruction>(builder.CreateICmp(ICmpInst::ICMP_NE, src, builder.getInt32(0)));
@@ -7394,20 +7407,27 @@ void InsertBranchOpt::atomicSpiltOpt(Function& F, int mode)
73947407

73957408
builder.SetInsertPoint( ElseTerm );
73967409
readI = createReadFromAtomic( builder, inst, isTyped);
7410+
7411+
isModified = true;
73977412
}
7398-
else // ( op == AtomicOp::EATOMIC_UMAX && alternateUmaxMode )
7413+
else if ( ( umaxMode && ( op == AtomicOp::EATOMIC_UMAX ) )
7414+
|| ( uminMode && ( op == AtomicOp::EATOMIC_UMIN ) ) )
73997415
{
74007416
// Create an if-then structure.
74017417
// x = typedread or load
7402-
// if (x < src)
7403-
// use the original atomic umax inst src
7418+
// if (src > (for UMax) or < (for Umin) x)
7419+
// use the original atomic umax/umin inst src
74047420
readI = createReadFromAtomic( builder, inst, isTyped );
7405-
Instruction* condInst = dyn_cast<Instruction>( builder.CreateICmp( ICmpInst::ICMP_UGT, src, readI ) );
7421+
CmpInst::Predicate predicate = umaxMode ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULT;
7422+
Instruction* condInst = dyn_cast<Instruction>( builder.CreateICmp( predicate, src, readI ) );
74067423

74077424
splitBBAndName( condInst, inst, &ThenTerm, nullptr, MergeBlock );
74087425
inst->moveBefore( ThenTerm );
7426+
7427+
isModified = true;
74097428
}
7410-
if( inst->getNumUses() )
7429+
7430+
if( isModified && inst->getNumUses() )
74117431
{
74127432
PHINode* newPhi = PHINode::Create(inst->getType(), 2, "", &MergeBlock->front());
74137433
inst->replaceUsesOutsideBlock(newPhi, inst->getParent());
@@ -7666,7 +7686,7 @@ bool InsertBranchOpt::runOnFunction(Function& F)
76667686
int mode = IGC_IS_FLAG_ENABLED( EnableAtomicBranch ) ? IGC_GET_FLAG_VALUE( EnableAtomicBranch ) : pContext->getModuleMetaData()->csInfo.atomicBranch;
76677687
if( mode )
76687688
{
7669-
atomicSpiltOpt( F, mode );
7689+
atomicSplitOpt( F, mode );
76707690
}
76717691

76727692
if (IGC_IS_FLAG_ENABLED(EnableThreeWayLoadSpiltOpt) || pContext->getModuleMetaData()->enableThreeWayLoadSpiltOpt)

IGC/common/igc_flags.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ DECLARE_IGC_REGKEY(debugString, LLVMCommandLine, 0, "applies LLVM com
227227
DECLARE_IGC_REGKEY(debugString, SelectiveHashOptions, 0, "applies options to hash range via string", false)
228228
DECLARE_IGC_REGKEY(bool, DisableDX9LowPrecision, true, "Disables HF in DX9.", false)
229229
DECLARE_IGC_REGKEY(bool, EnablePingPongTextureOpt, true, "Enables the Ping Pong texture optimization which is used only for Compute Shaders for back to back dispatches", false)
230-
DECLARE_IGC_REGKEY(DWORD,EnableAtomicBranch, 0, "Enable Atomic branch optimization that break atomic into if/else. 1: if Val == 0 ignore iadd/sub/umax 0. 2: checks if memory is lower than Val before doing umax. 3: applies both 1 for iadd/sub and 2 for umax", false)
230+
DECLARE_IGC_REGKEY(DWORD,EnableAtomicBranch, 0, "Bitmask to enable Atomic branch optimization that predicates atomic with if/else. 1: if Val == 0 ignore iadd/sub/umax 0. 2: checks if memory is lower than Val for umax. 4: checks if memory if greater than Val for umin.", false)
231231
DECLARE_IGC_REGKEY(bool, EnableThreeWayLoadSpiltOpt, false, "Enable three way load spilt opt.", false)
232232
DECLARE_IGC_REGKEY(bool, EnableSamplerChannelReturn, true, "Setting this to 1/true adds a compiler switch to enable using header to return selective channels from sampler", false)
233233
DECLARE_IGC_REGKEY(bool, EnableThreadCombiningOpt, true, "Enables the thread combining optimization which is used only for Compute Shaders for combining a number of software threads to dispatch smaller number of hardware threads", false)

0 commit comments

Comments
 (0)