@@ -2882,7 +2882,8 @@ void HWConformity::fix64bInst( INST_LIST_ITER iter, G4_BB* bb )
28822882 for (int i = 0 , size = G4_Inst_Table[inst->opcode ()].n_srcs ; !uses64BitType && i < size; i++)
28832883 {
28842884 G4_Operand* src = inst->getSrc (i);
2885- if (src != NULL && G4_Type_Table[src->getType ()].byteSize == 8 )
2885+
2886+ if (src && G4_Type_Table[src->getType ()].byteSize == 8 )
28862887 {
28872888 uses64BitType = true ;
28882889 }
@@ -2897,12 +2898,42 @@ void HWConformity::fix64bInst( INST_LIST_ITER iter, G4_BB* bb )
28972898
28982899 if (uses64BitType)
28992900 {
2900- #if 0
2901- //#ifdef DEBUG_VERBOSE_ON
2902- std::cout << "CHV 64b fix for:\n";
2903- inst->emit(std::cout);
2904- std::cout << "\n";
2905- #endif
2901+
2902+ if (builder.no64bitType () && inst->opcode () == G4_mov)
2903+ {
2904+ // while input should not have any ALU inst with 64b type, we may still end up
2905+ // with 64b moves generated when preparing send payload (e.g., 64b atomics,
2906+ // A64 messages). We fix such moves here by breaking them into 2 32b moves
2907+ // For now only handle copy moves.
2908+ auto dst = inst->getDst ();
2909+ auto src0 = inst->getSrc (0 );
2910+ assert (getTypeSize (dst->getType ()) == 8 &&
2911+ getTypeSize (src0->getType ()) == 8 && " must be copy moves" );
2912+ assert (src0->isSrcRegRegion () &&
2913+ (src0->asSrcRegRegion ()->isScalar () ||
2914+ src0->asSrcRegRegion ()->getRegion ()->isContiguous (inst->getExecSize ())) &&
2915+ " expect src0 to be scalar or contiguous" );
2916+ auto src0RR = src0->asSrcRegRegion ();
2917+ assert (inst->isRawMov () && dst->getHorzStride () == 1 && " expect only copy moves" );
2918+
2919+ // 1st half
2920+ auto newDst = builder.createDstRegRegion (Direct, dst->getBase (), dst->getRegOff (), dst->getSubRegOff () * 2 ,
2921+ 2 , Type_UD);
2922+ auto newSrc = builder.createSrcRegRegion (Mod_src_undef, Direct, src0RR->getBase (), src0RR->getRegOff (),
2923+ src0RR->getSubRegOff () * 2 , src0RR->isScalar () ? builder.getRegionScalar () : builder.getRegionStride2 (), Type_UD);
2924+ auto newInst = builder.createInst (nullptr , G4_mov, nullptr , false , inst->getExecSize (), newDst, newSrc, nullptr , inst->getOption ());
2925+ bb->insert (iter, newInst);
2926+
2927+ // second half
2928+ newDst = builder.createDstRegRegion (Direct, dst->getBase (), dst->getRegOff (), dst->getSubRegOff () * 2 + 1 ,
2929+ 2 , Type_UD);
2930+ newSrc = builder.createSrcRegRegion (Mod_src_undef, Direct, src0RR->getBase (), src0RR->getRegOff (),
2931+ src0RR->getSubRegOff () * 2 + 1 , src0RR->isScalar () ? builder.getRegionScalar () : builder.getRegionStride2 (), Type_UD);
2932+ newInst = builder.createInst (nullptr , G4_mov, nullptr , false , inst->getExecSize (), newDst, newSrc, nullptr , inst->getOption ());
2933+ *iter = newInst;
2934+ return ;
2935+ }
2936+
29062937 int numSrc = G4_Inst_Table[inst->opcode ()].n_srcs ;
29072938
29082939 // handle indirect sources first
0 commit comments