Skip to content

Commit 7f07369

Browse files
lwesiersigcbot
authored andcommitted
Fix manageablebarrier hang
Move the markID after the barrier and fence on workgroup scope in getFreeID() to ensure that all of the threads will read correct value from IDPool, before the threadID:0 will update it in function emitInit.
1 parent 7137af8 commit 7f07369

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

IGC/Compiler/Optimizer/OpenCLPasses/ManageableBarriers/ManageableBarriersResolution.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,13 +201,12 @@ Value *ManageableBarriersResolution::prepareBarrierIDPoolPtr(Instruction *pInser
201201
// Binary form of IDPool 00000000 00000000 00000000 00010000
202202
// first free ID is 5
203203

204-
void ManageableBarriersResolution::markID(Value *IDPool, Value *IDBarrier, Instruction *pInsertBefore) {
204+
void ManageableBarriersResolution::markID(Value *IDPool, Value *currentIDPoolState, Value *IDBarrier, Instruction *pInsertBefore) {
205205
IGCIRBuilder<> builder(pInsertBefore);
206206

207207
Value *maskID = builder.CreateShl(builder.getInt32(1), IDBarrier);
208208
Value *notBit_maskID = builder.CreateXor(maskID, builder.getInt32(-1));
209209

210-
Value *currentIDPoolState = builder.CreateLoad(builder.getInt32Ty(), IDPool);
211210
Value *currentIDPoolStateUpdated = builder.CreateAnd(notBit_maskID, currentIDPoolState);
212211

213212
builder.CreateStore(currentIDPoolStateUpdated, IDPool);
@@ -220,9 +219,8 @@ void ManageableBarriersResolution::releaseID(Value *IDPool, Value *IDBarrier, In
220219
ResolveOCLAtomics::CallAtomicSingleLane(AtomicOp::EATOMIC_OR, IDPool, maskID, pInsertBefore);
221220
}
222221

223-
Value *ManageableBarriersResolution::getFreeID(Value *IDPool, Instruction *pInsertBefore) {
222+
Value *ManageableBarriersResolution::getFreeID(Value *currentIDPoolState, Instruction *pInsertBefore) {
224223
IGCIRBuilder<> builder(pInsertBefore);
225-
Value *currentIDPoolState = builder.CreateLoad(builder.getInt32Ty(), IDPool);
226224

227225
Function *func_llvm_GenISA_firstbitLo = GenISAIntrinsic::getDeclaration(mModule, GenISAIntrinsic::GenISA_firstbitLo);
228226
Value *freeIDNumber = builder.CreateCall(func_llvm_GenISA_firstbitLo, {currentIDPoolState});
@@ -270,14 +268,13 @@ void ManageableBarriersResolution::emitInit(CallInst *pInsertPoint) {
270268
// Get current free ID for named barrier
271269
Value *barrierIDPoolPtr = getBarrierIDPoolPtr(pInsertPoint);
272270

273-
Value *getFirstFreeID = getFreeID(barrierIDPoolPtr, chekForSingleLane);
271+
LoadInst *currentIDPoolState = new LoadInst(builder.getInt32Ty(), barrierIDPoolPtr, "", chekForSingleLane);
272+
Value *getFirstFreeID = getFreeID(currentIDPoolState, chekForSingleLane);
274273
Value *ptrToBarrierSlot = getManageableBarrierstructDataPtr(pInsertPoint, getFirstFreeID, chekForSingleLane);
275274

276275
// Fill the basic block section for the Init function of barriers
277276
Instruction *instrJump = BranchInst::Create(bbAfter, bbInitSection);
278277

279-
markID(barrierIDPoolPtr, getFirstFreeID, instrJump);
280-
281278
storeManageableBarrierstructData(ptrToBarrierSlot, MBDynamicStructFields::BarrierID, getFirstFreeID, instrJump);
282279

283280
storeManageableBarrierstructData(ptrToBarrierSlot, MBDynamicStructFields::ProducerCount,
@@ -297,6 +294,10 @@ void ManageableBarriersResolution::emitInit(CallInst *pInsertPoint) {
297294
GenIntrinsicInst::Create(GenISAIntrinsic::getDeclaration(mModule, GenISAIntrinsic::GenISA_threadgroupbarrier), {},
298295
"", pInsertPoint);
299296

297+
// Move the markID after the barrier and fence on workgroup scope to ensure that all of the threads will
298+
// read correct value from IDPool, before the threadID:0 will update it.
299+
markID(barrierIDPoolPtr, currentIDPoolState, getFirstFreeID, pInsertPoint);
300+
300301
pInsertPoint->replaceAllUsesWith(ptrToBarrierSlot);
301302
}
302303
}

IGC/Compiler/Optimizer/OpenCLPasses/ManageableBarriers/ManageableBarriersResolution.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ class ManageableBarriersResolution : public llvm::ModulePass,
9999
llvm::Value *prepareBarrierIDPoolPtr(llvm::Instruction *pInsertBefore);
100100
llvm::Value *getBarriersDataPoolPtr(llvm::Instruction *pCallInst);
101101
llvm::Value *getBarrierIDPoolPtr(llvm::Instruction *pCallInst);
102-
void markID(llvm::Value *IDPool, llvm::Value *IDBarrier, llvm::Instruction *pInsertBefore);
102+
void markID(llvm::Value *IDPool, llvm::Value *currentIDPoolState, llvm::Value *IDBarrier, llvm::Instruction *pInsertBefore);
103103
void releaseID(llvm::Value *IDPool, llvm::Value *IDBarrier, llvm::Instruction *pInsertBefore);
104-
llvm::Value *getFreeID(llvm::Value *IDPool, llvm::Instruction *pInsertBefore);
104+
llvm::Value *getFreeID(llvm::Value *currentIDPoolState, llvm::Instruction *pInsertBefore);
105105

106106
void clearData();
107107

IGC/ocloc_tests/manageablebarriers/manageablebarriers_simple_case.cl

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,6 @@
2929
// CHECK: [[MB_DATA_OFFSET_IntPTR:%[0-9]+]] = add i32 [[BarrierIDPool_FirstFreeID_Offset]], [[MB_DATA_GetIntPTR]]
3030
// CHECK: [[MB_DATA_OFFSET_PTR:%[0-9]+]] = inttoptr i32 [[MB_DATA_OFFSET_IntPTR]] to ptr addrspace(3)
3131

32-
//// Mark the FreeID in the ManageableBarriers ID Pool (that this ID is busy)
33-
// CHECK: [[FreeID_InBit:%[0-9]+]] = shl i32 1, [[BarrierIDPool_FirstFreeID]]
34-
// CHECK: [[FreeID_InBitNeg:%[0-9]+]] = xor i32 [[FreeID_InBit]], -1
35-
// CHECK: [[BarrierIDPool_Load2:%[0-9]+]] = load i32, ptr addrspace(3) [[BarrierIDPool_Ptr]], align 4
36-
// CHECK: [[BarrierIDPool_UpdateBits:%[0-9]+]] = and i32 [[FreeID_InBitNeg]], [[BarrierIDPool_Load2]]
37-
// CHECK: store i32 [[BarrierIDPool_UpdateBits]], ptr addrspace(3) [[BarrierIDPool_Ptr]], align 4
38-
3932
//// Fill with data for the ManageBarrier struct in SLM (for the particular single ManageableBarrier)
4033
//// Fill the FreeID in the ManageableBarriers Data ID offset
4134
// CHECK: [[MB_DATA_OFFSET_ID_GetIntPTR:%[0-9]+]] = ptrtoint ptr addrspace(3) [[MB_DATA_OFFSET_PTR]] to i32
@@ -65,6 +58,12 @@
6558
// CHECK: call void @llvm.genx.GenISA.memoryfence(i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i32 0)
6659
// CHECK: call void @llvm.genx.GenISA.threadgroupbarrier()
6760

61+
//// Mark the FreeID in the ManageableBarriers ID Pool (that this ID is busy)
62+
// CHECK: [[FreeID_InBit:%[0-9]+]] = shl i32 1, [[BarrierIDPool_FirstFreeID]]
63+
// CHECK: [[FreeID_InBitNeg:%[0-9]+]] = xor i32 [[FreeID_InBit]], -1
64+
// CHECK: [[BarrierIDPool_UpdateBits:%[0-9]+]] = and i32 [[FreeID_InBitNeg]], [[BarrierIDPool_Load]]
65+
// CHECK: store i32 [[BarrierIDPool_UpdateBits]], ptr addrspace(3) [[BarrierIDPool_Ptr]], align 4
66+
6867
/////////////////////////////////////////////
6968
//// ManageableBarriersArriveINTEL resolution
7069
/// Get the data for this barrier

0 commit comments

Comments
 (0)