@@ -164,18 +164,41 @@ INST_LIST_ITER InstSplitPass::splitInstruction(INST_LIST_ITER it,
164164 return false ;
165165 };
166166
167+ // Check destination
168+ bool isDstCross2GRF = cross2GRFDst (inst->getDst ());
169+ if (inst->getDst () && isDstCross2GRF && !AllowCross2GRF (inst->getDst ())) {
170+ doSplit = true ;
171+ }
172+
167173 // Check sources
168174 for (int i = 0 , numSrc = inst->getNumSrc (); i < numSrc; ++i) {
169175 if (!inst->getSrc (i)->isSrcRegRegion ())
170176 continue ;
171- if (cross2GRF (inst->getSrc (i)) && !AllowCross2GRF (inst->getSrc (i))) {
177+ G4_SrcRegRegion *src = inst->getSrc (i)->asSrcRegRegion ();
178+
179+ // If dst spans 4 GRFs(dst must be 64b datatype and execSize == 32), src
180+ // operand must be broadcast regioning or flat regioning (dst/src is
181+ // aligned). In other words if src is not 64b datatype, we must fix src
182+ // with >1 stride. In this way src also spans 4 GRFs after fixing.
183+ // Although this is allowed by HW but it's not allowed by vISA as vISA
184+ // requires contiguous regioning for 4 GRFs operand. So, we have to split
185+ // for such case. For example:
186+ // mov (32|M0) r48.0<1>:q r12.0<1;1,0>:d
187+ // =>
188+ // mov (16|M0) r48.0<1>:q r12.0<1;1,0>:d
189+ // mov (16|M16) r50.0<1>:q r13.0<1;1,0>:d
190+ // If dst is :df datatype, wouldn't split here as it's in long pipeline and
191+ // will be fixed in HWConformity.
192+ if ((isDstCross2GRF && !IS_DFTYPE (inst->getDst ()->getType ()) &&
193+ src->getTypeSize () != 8 &&
194+ !src->isScalarSrc ()) ||
195+ (cross2GRF (src) && !AllowCross2GRF (src))) {
172196 doSplit = true ;
173197 break ;
174198 }
175199 if (m_builder->getPlatform () >= Xe_XeHPSDV) {
176200 // Instructions whose operands are 64b and have 2D regioning need to be
177201 // split up front to help fixUnalignedRegions(..) covering 2D cases.
178- G4_SrcRegRegion *src = inst->getSrc (i)->asSrcRegRegion ();
179202 if ((src->getType () == Type_DF || IS_QTYPE (src->getType ())) &&
180203 !src->getRegion ()->isSingleStride (execSize)) {
181204 // Try splitting the inst if it's a mov. Otherwise, legalize
@@ -193,12 +216,6 @@ INST_LIST_ITER InstSplitPass::splitInstruction(INST_LIST_ITER it,
193216 }
194217 }
195218
196- // Check destination
197- if (inst->getDst () && cross2GRFDst (inst->getDst ()) &&
198- !AllowCross2GRF (inst->getDst ())) {
199- doSplit = true ;
200- }
201-
202219 // Handle split exceptions
203220 if (!doSplit) {
204221 if (inst->opcode () == G4_cmp && !m_builder->supportNativeSIMD32 ()) {
0 commit comments