@@ -2903,6 +2903,8 @@ bool HWConformity::emulate64bMov(INST_LIST_ITER iter, G4_BB *bb) {
29032903
29042904 if (src0->isSrcRegRegion ()) {
29052905 auto src0RR = src0->asSrcRegRegion ();
2906+ vISA_ASSERT (IS_INT (src0RR->getType ()) && IS_INT (dst->getType ()),
2907+ " expecting int types on src, dst" );
29062908 vISA_ASSERT (src0RR->getModifier () == Mod_src_undef,
29072909 " cannot handle saturation" );
29082910
@@ -8016,6 +8018,46 @@ uint16_t HWConformity::getSrcStride(G4_SrcRegRegion *src) {
80168018 return srcStride;
80178019};
80188020
8021+ void HWConformity::change64bStride2CopyToUD (INST_LIST_ITER it, G4_BB *bb) {
8022+ G4_INST *inst = *it;
8023+ G4_Operand *src = inst->getSrc (0 );
8024+ vISA_ASSERT (src != nullptr && src->isSrcRegRegion (),
8025+ " source must be a SrcRegRegion" );
8026+ G4_SrcRegRegion *origSrc = src->asSrcRegRegion ();
8027+ G4_Type execType = inst->getDst ()->getType ();
8028+ uint16_t stride = inst->getDst ()->getHorzStride ();
8029+ short dstRegOff = inst->getDst ()->getRegOff ();
8030+ short dstSubRegOff = inst->getDst ()->getSubRegOff ();
8031+
8032+ vISA_ASSERT (execType == Type_Q || execType == Type_DF,
8033+ " Only 64b data type support" );
8034+ execType = Type_UD;
8035+ dstSubRegOff *= 2 ;
8036+
8037+ G4_DstRegRegion *newDst =
8038+ builder.createDst (inst->getDst ()->getBase (), dstRegOff, dstSubRegOff + 1 ,
8039+ stride * 2 , execType);
8040+ G4_SrcRegRegion *newSrc = builder.createSrcRegRegion (
8041+ origSrc->getModifier (), Direct, origSrc->getBase (), origSrc->getRegOff (),
8042+ origSrc->getSubRegOff () * 2 + 1 , builder.createRegionDesc (2 , 1 , 0 ),
8043+ Type_UD);
8044+ inst->setSrc (newSrc, 0 );
8045+ inst->setDest (newDst);
8046+
8047+ G4_DstRegRegion *newDst1 = builder.createDst (
8048+ inst->getDst ()->getBase (), dstRegOff, dstSubRegOff, stride * 2 , execType);
8049+ G4_SrcRegRegion *newSrc1 = builder.createSrcRegRegion (
8050+ origSrc->getModifier (), Direct, origSrc->getBase (), origSrc->getRegOff (),
8051+ origSrc->getSubRegOff () * 2 , builder.createRegionDesc (2 , 1 , 0 ), Type_UD);
8052+
8053+ G4_INST *movInst = builder.createMov (inst->getExecSize (), newDst1, newSrc1,
8054+ inst->getOption (), false );
8055+
8056+ INST_LIST_ITER iter = it;
8057+ iter++;
8058+ bb->insertBefore (it, movInst);
8059+ }
8060+
80198061// on XeHP_SDV we have to make sure each source element is alignd to each dst
80208062// element for all float/64b inst (packed HF is ok in mixed mode inst) For all
80218063// violating instructions, we align each operand to the execution type for float
@@ -8107,8 +8149,15 @@ void HWConformity::fixUnalignedRegions(INST_LIST_ITER it, G4_BB *bb) {
81078149 // for packed 64b copy moves that are not under divergent CF, we can
81088150 // change its type to UD
81098151 change64bCopyToUD (inst, srcStride / inst->getSrc (0 )->getTypeSize ());
8110- } else if (srcStride != 0 ) {
8152+ } else if (isNoMaskInst && inst->getDst ()->getHorzStride () == 2 &&
8153+ execTyWidth == 8 &&
8154+ src0RR->getRegion ()->isContiguous (inst->getExecSize ())) {
8155+ change64bStride2CopyToUD (it, bb);
8156+ } else if (execTyWidth == 8 && IS_TYPE_INT (dstTy) &&
8157+ IS_TYPE_INT (src0RR->getType ()) && srcStride != 0 &&
8158+ !src0RR->isIndirect ()) {
81118159 // we can split 64b moves with single source stride into 2UD moves
8160+ // ToDo: check if this subsumes the previous else if
81128161 emulate64bMov (it, bb);
81138162 } else {
81148163 // a move we don't know how to handle without inserting more moves
0 commit comments