Skip to content

Commit ec97e42

Browse files
pkwasnie-inteligcbot
authored andcommitted
sub group clustered broadcast support for cluster size equal
to SIMD size Adds support to sub group clustered broadcast with cluster size equal to SIMD size.
1 parent d95903d commit ec97e42

File tree

1 file changed

+51
-9
lines changed

1 file changed

+51
-9
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5763,20 +5763,62 @@ void EmitPass::emitSimdClusteredBroadcast(llvm::Instruction* inst)
57635763
IGC_ASSERT_MESSAGE(isa<llvm::ConstantInt>(inst->getOperand(2)), "Unsupported: cluster lane must be constant");
57645764
const unsigned int clusterLane = int_cast<uint32_t>(cast<llvm::ConstantInt>(inst->getOperand(2))->getZExtValue());
57655765

5766-
IGC_ASSERT_MESSAGE(clusterSize < numLanes(m_currShader->m_dispatchSize), "cluster size must be smaller than SIMD");
5767-
IGC_ASSERT_MESSAGE(clusterSize == 8 || clusterSize == 16, "cluster size must be 8 or 16");
5766+
IGC_ASSERT_MESSAGE(clusterSize <= numLanes(m_currShader->m_dispatchSize), "cluster size must be smaller or equal to SIMD");
5767+
IGC_ASSERT_MESSAGE(clusterSize % 8 == 0, "cluster size must be 8 or 16");
57685768
IGC_ASSERT_MESSAGE(clusterLane < clusterSize, "cluster lane does not fit in cluster size");
57695769

5770-
m_encoder->SetSrcRegion(0, clusterSize, clusterSize, 0);
5771-
m_encoder->SetSrcSubReg(0, clusterLane);
5772-
m_encoder->Copy(m_destination, data);
5773-
if (m_currShader->m_numberInstance > 1)
5770+
if (clusterSize == numLanes(m_currShader->m_dispatchSize))
57745771
{
5775-
m_encoder->SetSecondHalf(true);
5772+
// There is actually no cluster, just do subgroup broadcast instead.
5773+
if (m_currShader->m_numberInstance > 1)
5774+
{
5775+
// Use an intermediate uniform variable
5776+
CVariable* uniformTemp = m_currShader->GetNewVariable(
5777+
1,
5778+
data->GetType(),
5779+
m_encoder->GetCISADataTypeAlignment(data->GetType()),
5780+
true, // isUniform
5781+
"ClusteredBroadcastTmp");
5782+
5783+
// Copy from source to the uniform temp...
5784+
m_encoder->SetSecondHalf(clusterLane >= 16);
5785+
m_encoder->SetNoMask();
5786+
m_encoder->SetSrcRegion(0, 0, 1, 0);
5787+
m_encoder->SetSrcSubReg(0, clusterLane % numLanes(m_encoder->GetSimdSize()));
5788+
m_encoder->Copy(uniformTemp, data);
5789+
m_encoder->Push();
5790+
m_encoder->SetSecondHalf(false);
5791+
5792+
// ...and broadcast.
5793+
m_encoder->Copy(m_destination, uniformTemp);
5794+
m_encoder->Push();
5795+
m_encoder->SetSecondHalf(true);
5796+
m_encoder->Copy(m_destination, uniformTemp);
5797+
m_encoder->SetSecondHalf(false);
5798+
}
5799+
else
5800+
{
5801+
m_encoder->SetSrcRegion(0, 0, 1, 0);
5802+
m_encoder->SetSrcSubReg(0, clusterLane);
5803+
m_encoder->Copy(m_destination, data);
5804+
m_encoder->Push();
5805+
}
5806+
}
5807+
else
5808+
{
5809+
// Clustered broadcast.
5810+
m_encoder->SetSrcRegion(0, clusterSize, clusterSize, 0);
5811+
m_encoder->SetSrcSubReg(0, clusterLane);
57765812
m_encoder->Copy(m_destination, data);
5777-
m_encoder->SetSecondHalf(false);
5813+
if (m_currShader->m_numberInstance > 1)
5814+
{
5815+
m_encoder->SetSecondHalf(true);
5816+
m_encoder->Copy(m_destination, data);
5817+
m_encoder->SetSecondHalf(false);
5818+
}
5819+
m_encoder->Push();
57785820
}
5779-
m_encoder->Push();
5821+
57805822
}
57815823

57825824
void EmitPass::emitSimdShuffleDown(llvm::Instruction* inst)

0 commit comments

Comments
 (0)