@@ -943,7 +943,7 @@ bool EmitPass::runOnFunction(llvm::Function& F)
943943 for (uint i = 0; i < m_pattern->m_numBlocks; i++)
944944 {
945945 SBasicBlock& block = m_pattern->m_blocks[i];
946- block.clearCaching() ; // clear for each SIMD size
946+ block.m_activeMask = nullptr ; // clear for each SIMD size
947947 m_currentBlock = i;
948948 if (m_blockCoalescing->IsEmptyBlock(block.bb))
949949 {
@@ -975,8 +975,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
975975 while (I != E)
976976 {
977977 Instruction* llvmInst = I->m_root;
978- resetCurrInstNumInstances();
979-
980978 if (llvmInst->getDebugLoc())
981979 {
982980 unsigned int curLineNumber = llvmInst->getDebugLoc().getLine();
@@ -1006,8 +1004,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
10061004 bool slicing = false;
10071005 uint numInstance = DecideInstanceAndSlice(*block.bb, *I, slicing);
10081006 IGC_ASSERT(numInstance == 1 || numInstance == 2);
1009- // caching the number of instance
1010- setCurrInstNumInstances(numInstance);
10111007
10121008 if (slicing && !disableSlicing)
10131009 {
@@ -1037,7 +1033,6 @@ bool EmitPass::runOnFunction(llvm::Function& F)
10371033 if (slicing)
10381034 {
10391035 numInstance = DecideInstanceAndSlice(*block.bb, *I, slicing);
1040- setCurrInstNumInstances(numInstance);
10411036 }
10421037
10431038 if (llvmtoVISADump)
@@ -12332,93 +12327,32 @@ void EmitPass::emitScalarAtomics(
1233212327 uniformAtomicOp = EATOMIC_IADD;
1233312328 }
1233412329 bool returnsImmValue = (!pInst->use_empty());
12335- CVariable* pFinalAtomicSrcVal;
12330+ CVariable* pFinalAtomicSrcVal = m_currShader->GetNewVariable(
12331+ 1,
12332+ type,
12333+ isA64 ? EALIGN_2GRF : EALIGN_GRF,
12334+ true,
12335+ CName::NONE);
1233612336 CVariable* pSrcsArr[2] = { nullptr, nullptr };
12337-
12338- if (op == EOPCODE_ADD && bitWidth == 32 && pSrc->IsUniform() &&
12339- getCurrInstNumInstances() == 1 && !returnsImmValue)
12337+ if (returnsImmValue)
1234012338 {
12341- // Special case for uniform DW src (like atomic_add(1) without return value.
12342- // Note: limit this code for a single instance for now as scalar atomic must have
12343- // instance = 1 (see DecideInstanceAndSlice()).
12344- //
12345- // The following sequence will be generated:
12346- // (W) mov (16|M0) f0.0<1>:uw 0:uw
12347- // cmp.eq.f0.0 (16|M0) dummy:uw dummy:uw
12348- // (W) mov (1|M0) r2.0<1>:uw f0.0:uw
12349- // (W) cbit (1|M0) r1.0:uw r2.0:uw <-- r1.0 : number of active lanes
12350- // (W) mul (1|M0) r10:ud pSrc r1.0:uw
12351- SBasicBlock& currBlk = getCurrentBlock();
12352- CVariable* numActiveLanes = currBlk.m_numActiveLanes;
12353- if (numActiveLanes == nullptr)
12354- {
12355- CVariable* emask = GetExecutionMask(); // execution mask for the entire dispatch size
12356- // Count the number of '1' bits we have in the execmask to get the number of active lanes.
12357- // For example, given emask = 1011011000100010b, numActiveLanes = 7
12358- // This will handle cases in which not all lanes are active.
12359- numActiveLanes = m_currShader->GetNewVariable(1, ISA_TYPE_W, EALIGN_DWORD, true, CName::NONE);
12360- m_encoder->CBit(numActiveLanes, emask);
12361- m_encoder->Push();
12362-
12363- // save it for possible re-use later.
12364- currBlk.m_numActiveLanes = numActiveLanes;
12365- }
12339+ // sum all the lanes
12340+ emitPreOrPostFixOp(op, identityValue, type, negateSrc, pSrc, pSrcsArr);
1236612341
12367- // pFinalAtomicSrcVal is used in msg's payload and thus needs to be GRF-aligned
12368- pFinalAtomicSrcVal = m_currShader->GetNewVariable(1, ISA_TYPE_D, EALIGN_GRF, true, CName::NONE);
12369- if (pSrc->IsImmediate() && pSrc->GetImmediateValue() == 1)
12342+ CVariable* pSrcCopy = pSrcsArr[0];
12343+ if (m_currShader->m_numberInstance == 2)
1237012344 {
12371- if (negateSrc)
12372- {
12373- m_encoder->SetSrcModifier(0, EMOD_NEG);
12374- }
12375- m_encoder->Cast(pFinalAtomicSrcVal, numActiveLanes);
12376- m_encoder->Push();
12345+ pSrcCopy = pSrcsArr[1];
1237712346 }
12378- else
12379- {
12380- m_encoder->Mul(pFinalAtomicSrcVal, pSrc, numActiveLanes);
12381- m_encoder->Push();
1238212347
12383- // using neg srcmod with mul will end up with more insts, thus using srcmod on mov
12384- if (negateSrc)
12385- {
12386- m_encoder->SetSrcModifier(0, EMOD_NEG);
12387- }
12388- m_encoder->Copy(pFinalAtomicSrcVal, pFinalAtomicSrcVal);
12389- m_encoder->Push();
12390- }
12348+ m_encoder->SetSrcRegion(0, 0, 1, 0);
12349+ m_encoder->SetSrcSubReg(0, numLanes(m_currShader->m_SIMDSize) - 1);
12350+ m_encoder->Copy(pFinalAtomicSrcVal, pSrcCopy);
12351+ m_encoder->Push();
1239112352 }
1239212353 else
1239312354 {
12394- // general case
12395- pFinalAtomicSrcVal = m_currShader->GetNewVariable(
12396- 1,
12397- type,
12398- isA64 ? EALIGN_2GRF : EALIGN_GRF,
12399- true,
12400- CName::NONE);
12401-
12402- if (returnsImmValue)
12403- {
12404- // sum all the lanes
12405- emitPreOrPostFixOp(op, identityValue, type, negateSrc, pSrc, pSrcsArr);
12406-
12407- CVariable* pSrcCopy = pSrcsArr[0];
12408- if (m_currShader->m_numberInstance == 2)
12409- {
12410- pSrcCopy = pSrcsArr[1];
12411- }
12412-
12413- m_encoder->SetSrcRegion(0, 0, 1, 0);
12414- m_encoder->SetSrcSubReg(0, numLanes(m_currShader->m_SIMDSize) - 1);
12415- m_encoder->Copy(pFinalAtomicSrcVal, pSrcCopy);
12416- m_encoder->Push();
12417- }
12418- else
12419- {
12420- emitReductionAll(op, identityValue, type, negateSrc, pSrc, pFinalAtomicSrcVal);
12421- }
12355+ emitReductionAll(op, identityValue, type, negateSrc, pSrc, pFinalAtomicSrcVal);
1242212356 }
1242312357
1242412358 auto moveToReg = [&](CVariable*& pVar)
@@ -12454,6 +12388,11 @@ void EmitPass::emitScalarAtomics(
1245412388 m_encoder->SetSimdSize(SIMDMode::SIMD1);
1245512389 m_encoder->SetNoMask();
1245612390
12391+ CVariable* pReturnVal = returnsImmValue ?
12392+ m_currShader->GetNewVariable(
12393+ 1, ISA_TYPE_UD, EALIGN_GRF, true, CName::NONE) :
12394+ nullptr;
12395+
1245712396 if (bitWidth == 16)
1245812397 {
1245912398 CVariable* pCastAtomicSrcVal =
@@ -12463,11 +12402,6 @@ void EmitPass::emitScalarAtomics(
1246312402 pFinalAtomicSrcVal = pCastAtomicSrcVal;
1246412403 }
1246512404
12466- CVariable* pReturnVal = returnsImmValue ?
12467- m_currShader->GetNewVariable(
12468- 1, ISA_TYPE_UD, EALIGN_GRF, true, CName::NONE) :
12469- nullptr;
12470-
1247112405 if (shouldGenerateLSC(pInst))
1247212406 {
1247312407 m_encoder->LSC_AtomicRaw(
0 commit comments