@@ -12076,6 +12076,79 @@ void EmitPass::emitScalarAtomics(
1207612076 }
1207712077}
1207812078
12079+ //
12080+ // We emulate an atomic_load with an atomic_or with zero.
12081+ // when the atomic is uniform we can directly generate a SIMD1 atomic_or
12082+ //
12083+ void EmitPass::emitScalarAtomicLoad(
12084+ llvm::Instruction* pInst,
12085+ ResourceDescriptor& resource,
12086+ CVariable* pDstAddr,
12087+ CVariable* pSrc,
12088+ bool isA64,
12089+ int bitWidth)
12090+ {
12091+ if (pDstAddr->IsImmediate())
12092+ {
12093+ CVariable* pDstAddrCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12094+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12095+ m_encoder->SetNoMask();
12096+ m_encoder->Copy(pDstAddrCopy, pDstAddr);
12097+ m_encoder->Push();
12098+ pDstAddr = pDstAddrCopy;
12099+ }
12100+
12101+ {
12102+ // pSrc is imm zero
12103+ CVariable* pSrcCopy = m_currShader->GetNewVariable(1, ISA_TYPE_UD, IGC::EALIGN_GRF, true);
12104+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12105+ m_encoder->SetNoMask();
12106+ m_encoder->Copy(pSrcCopy, pSrc);
12107+ m_encoder->Push();
12108+ pSrc = pSrcCopy;
12109+ }
12110+
12111+ m_encoder->SetSimdSize(SIMDMode::SIMD1);
12112+ m_encoder->SetNoMask();
12113+
12114+ CVariable* atomicDst = !pInst->use_empty() ? m_currShader->GetNewVariable(
12115+ 1,
12116+ ISA_TYPE_UD,
12117+ isA64 ? IGC::EALIGN_2GRF : IGC::EALIGN_GRF,
12118+ true) : nullptr;
12119+
12120+ if (isA64)
12121+ {
12122+ m_encoder->AtomicRawA64(
12123+ EATOMIC_OR, resource,
12124+ atomicDst, pDstAddr,
12125+ pSrc, nullptr,
12126+ bitWidth);
12127+ }
12128+ else
12129+ {
12130+ m_encoder->DwordAtomicRaw(
12131+ EATOMIC_OR, resource,
12132+ atomicDst, pDstAddr,
12133+ pSrc,
12134+ nullptr, bitWidth == 16);
12135+ }
12136+ m_encoder->Push();
12137+
12138+ if (!pInst->use_empty())
12139+ {
12140+ // we need to broadcast the return value
12141+ // ToDo: change divergence analysis to mark scalar atomic load as uniform
12142+ unsigned int counter = m_currShader->m_numberInstance;
12143+ for (unsigned int i = 0; i < counter; ++i)
12144+ {
12145+ m_encoder->SetSecondHalf(i == 1);
12146+ m_encoder->Copy(m_destination, atomicDst);
12147+ m_encoder->Push();
12148+ }
12149+ }
12150+ }
12151+
1207912152bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
1208012153{
1208112154 if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(pInst))
@@ -12106,7 +12179,11 @@ bool EmitPass::IsUniformAtomic(llvm::Instruction* pInst)
1210612179 atomic_op == EATOMIC_IMIN ||
1210712180 atomic_op == EATOMIC_IMAX;
1210812181
12109- if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()))
12182+ // capture the special case of atomic_or with 0 (it's used to simulate atomic_load)
12183+ bool isOrWith0Atomic = atomic_op == EATOMIC_OR &&
12184+ isa<ConstantInt>(pInst->getOperand(2)) && cast<ConstantInt>(pInst->getOperand(2))->isZero();
12185+
12186+ if (isAddAtomic || (isMinMaxAtomic && pInst->use_empty()) || isOrWith0Atomic)
1211012187 return true;
1211112188 }
1211212189 }
@@ -12212,8 +12289,16 @@ void EmitPass::emitAtomicRaw(llvm::GenIntrinsicInst* pInsn)
1221212289 e_alignment uniformAlign = isA64 ? EALIGN_2GRF : EALIGN_GRF;
1221312290 // Re-align the pointer if it's not GRF aligned.
1221412291 pDstAddr = ReAlignUniformVariable(pDstAddr, uniformAlign);
12215- emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12216- ResetVMask();
12292+ if (atomic_op == EATOMIC_OR)
12293+ {
12294+ // special case of atomic_load
12295+ emitScalarAtomicLoad(pInsn, resource, pDstAddr, pSrc0, isA64, bitwidth);
12296+ }
12297+ else
12298+ {
12299+ emitScalarAtomics(pInsn, resource, atomic_op, pDstAddr, pSrc0, isA64, bitwidth);
12300+ ResetVMask();
12301+ }
1221712302 return;
1221812303 }
1221912304
0 commit comments