@@ -201,13 +201,12 @@ Value *ManageableBarriersResolution::prepareBarrierIDPoolPtr(Instruction *pInser
201
201
// Binary form of IDPool 00000000 00000000 00000000 00010000
202
202
// first free ID is 5
203
203
204
- void ManageableBarriersResolution::markID (Value *IDPool, Value *IDBarrier, Instruction *pInsertBefore) {
204
+ void ManageableBarriersResolution::markID (Value *IDPool, Value *currentIDPoolState, Value * IDBarrier, Instruction *pInsertBefore) {
205
205
IGCIRBuilder<> builder (pInsertBefore);
206
206
207
207
Value *maskID = builder.CreateShl (builder.getInt32 (1 ), IDBarrier);
208
208
Value *notBit_maskID = builder.CreateXor (maskID, builder.getInt32 (-1 ));
209
209
210
- Value *currentIDPoolState = builder.CreateLoad (builder.getInt32Ty (), IDPool);
211
210
Value *currentIDPoolStateUpdated = builder.CreateAnd (notBit_maskID, currentIDPoolState);
212
211
213
212
builder.CreateStore (currentIDPoolStateUpdated, IDPool);
@@ -220,9 +219,8 @@ void ManageableBarriersResolution::releaseID(Value *IDPool, Value *IDBarrier, In
220
219
ResolveOCLAtomics::CallAtomicSingleLane (AtomicOp::EATOMIC_OR, IDPool, maskID, pInsertBefore);
221
220
}
222
221
223
- Value *ManageableBarriersResolution::getFreeID (Value *IDPool , Instruction *pInsertBefore) {
222
+ Value *ManageableBarriersResolution::getFreeID (Value *currentIDPoolState , Instruction *pInsertBefore) {
224
223
IGCIRBuilder<> builder (pInsertBefore);
225
- Value *currentIDPoolState = builder.CreateLoad (builder.getInt32Ty (), IDPool);
226
224
227
225
Function *func_llvm_GenISA_firstbitLo = GenISAIntrinsic::getDeclaration (mModule , GenISAIntrinsic::GenISA_firstbitLo);
228
226
Value *freeIDNumber = builder.CreateCall (func_llvm_GenISA_firstbitLo, {currentIDPoolState});
@@ -270,14 +268,13 @@ void ManageableBarriersResolution::emitInit(CallInst *pInsertPoint) {
270
268
// Get current free ID for named barrier
271
269
Value *barrierIDPoolPtr = getBarrierIDPoolPtr (pInsertPoint);
272
270
273
- Value *getFirstFreeID = getFreeID (barrierIDPoolPtr, chekForSingleLane);
271
+ LoadInst *currentIDPoolState = new LoadInst (builder.getInt32Ty (), barrierIDPoolPtr, " " , chekForSingleLane);
272
+ Value *getFirstFreeID = getFreeID (currentIDPoolState, chekForSingleLane);
274
273
Value *ptrToBarrierSlot = getManageableBarrierstructDataPtr (pInsertPoint, getFirstFreeID, chekForSingleLane);
275
274
276
275
// Fill the basic block section for the Init function of barriers
277
276
Instruction *instrJump = BranchInst::Create (bbAfter, bbInitSection);
278
277
279
- markID (barrierIDPoolPtr, getFirstFreeID, instrJump);
280
-
281
278
storeManageableBarrierstructData (ptrToBarrierSlot, MBDynamicStructFields::BarrierID, getFirstFreeID, instrJump);
282
279
283
280
storeManageableBarrierstructData (ptrToBarrierSlot, MBDynamicStructFields::ProducerCount,
@@ -297,6 +294,10 @@ void ManageableBarriersResolution::emitInit(CallInst *pInsertPoint) {
297
294
GenIntrinsicInst::Create (GenISAIntrinsic::getDeclaration (mModule , GenISAIntrinsic::GenISA_threadgroupbarrier), {},
298
295
" " , pInsertPoint);
299
296
297
+ // Move the markID after the barrier and fence on workgroup scope to ensure that all of the threads will
298
+ // read correct value from IDPool, before the threadID:0 will update it.
299
+ markID (barrierIDPoolPtr, currentIDPoolState, getFirstFreeID, pInsertPoint);
300
+
300
301
pInsertPoint->replaceAllUsesWith (ptrToBarrierSlot);
301
302
}
302
303
}
0 commit comments