Skip to content

Commit bf25778

Browse files
weiyu-chensys_zuul
authored andcommitted
Refactor insertElement emission code.
Change-Id: Ia34e60ce6480cc1674c4ce8f0e597d9b41dfcb17
1 parent 8eade27 commit bf25778

File tree

1 file changed

+44
-29
lines changed

1 file changed

+44
-29
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10088,48 +10088,63 @@ void EmitPass::emitInsert(llvm::Instruction* inst)
1008810088
m_encoder->Push();
1008910089

1009010090
// a0 = addressof(vector variable) + offset2 <-- address of element to insert at
10091-
CVariable* pDstArrElm = m_currShader->GetNewAddressVariable(
10092-
pIndexVar->IsUniform() ? 1 : numLanes(m_currShader->m_SIMDSize),
10093-
m_destination->GetType(),
10094-
pIndexVar->IsUniform(),
10095-
pInstVar->IsUniform());
10096-
10097-
m_encoder->AddrAdd(pDstArrElm, m_destination, pOffset2);
10098-
m_encoder->Push();
10099-
10100-
// If pIndexVar is uniform, we are using 1x1 indirect addressing and
10101-
// a single copy is what we need.
1010210091
if (pIndexVar->IsUniform())
1010310092
{
10093+
CVariable* pDstArrElm = m_currShader->GetNewAddressVariable(1, m_destination->GetType(), true, pInstVar->IsUniform());
10094+
m_encoder->AddrAdd(pDstArrElm, m_destination, pOffset2);
10095+
m_encoder->Push();
1010410096
m_encoder->Copy(pDstArrElm, pElemVar);
1010510097
m_encoder->Push();
1010610098
}
1010710099
else
1010810100
{
10109-
// Handle the case when the index is non-uniform - we need to lookup a different value
10110-
// for each simd lane.
10111-
// Since HW doesn't support writing to more than two consecutive GRFs, we need to simulate
10112-
// scattered write by a sequence of instructions, each one writing to a single simd-lane.
10113-
for (uint lane = 0; lane < numLanes(m_currShader->m_SIMDSize); ++lane)
10101+
int loopCount = (m_currShader->m_dispatchSize == SIMDMode::SIMD32 && m_currShader->m_numberInstance == 1) ? 2 : 1;
10102+
for (int i = 0; i < loopCount; ++i)
1011410103
{
10115-
CVariable* immMask = m_currShader->ImmToVariable(1ULL << lane, ISA_TYPE_UD);
10116-
CVariable* dstPred = m_currShader->GetNewVariable(
10117-
numLanes(m_SimdMode),
10118-
ISA_TYPE_BOOL,
10119-
EALIGN_BYTE);
10104+
if (i == 1)
10105+
{
10106+
// explicitly set second half as we are manually splitting
10107+
m_encoder->SetSecondHalf(true);
10108+
}
10109+
SIMDMode simdMode = std::min(m_currShader->m_SIMDSize, SIMDMode::SIMD16);
10110+
CVariable* pDstArrElm = m_currShader->GetNewAddressVariable(
10111+
numLanes(simdMode),
10112+
m_destination->GetType(),
10113+
false,
10114+
pInstVar->IsUniform());
1012010115

10121-
m_encoder->SetP(dstPred, immMask);
10116+
m_encoder->SetSimdSize(simdMode);
10117+
m_encoder->AddrAdd(pDstArrElm, m_destination, pOffset2);
1012210118
m_encoder->Push();
1012310119

10124-
m_encoder->SetPredicate(dstPred);
10125-
if (!pElemVar->IsUniform())
10120+
// Handle the case when the index is non-uniform - we need to lookup a different value
10121+
// for each simd lane.
10122+
// Since HW doesn't support scattered GRF writes, we need to simulate
10123+
// scattered write by a sequence of instructions, each one writing to a single simd-lane.
10124+
for (uint lane = 0; lane < numLanes(simdMode); ++lane)
1012610125
{
10127-
m_encoder->SetSrcSubReg(0, lane);
10126+
uint position = lane + i * 16;
10127+
CVariable* immMask = m_currShader->ImmToVariable(1ULL << lane, ISA_TYPE_UD);
10128+
CVariable* dstPred = m_currShader->GetNewVariable(
10129+
numLanes(m_SimdMode),
10130+
ISA_TYPE_BOOL,
10131+
EALIGN_BYTE);
10132+
10133+
m_encoder->SetSimdSize(simdMode);
10134+
m_encoder->SetP(dstPred, immMask);
10135+
m_encoder->Push();
10136+
10137+
m_encoder->SetPredicate(dstPred);
10138+
if (!pElemVar->IsUniform())
10139+
{
10140+
m_encoder->SetSrcSubReg(0, position);
10141+
}
10142+
m_encoder->SetSrcRegion(0, 0, 1, 0);
10143+
m_encoder->SetDstSubReg(lane);
10144+
m_encoder->SetSimdSize(simdMode);
10145+
m_encoder->Copy(pDstArrElm, pElemVar);
10146+
m_encoder->Push();
1012810147
}
10129-
m_encoder->SetSrcRegion(0, 0, 1, 0);
10130-
m_encoder->SetDstSubReg(lane);
10131-
m_encoder->Copy(pDstArrElm, pElemVar);
10132-
m_encoder->Push();
1013310148
}
1013410149
}
1013510150
}

0 commit comments

Comments
 (0)