@@ -5778,6 +5778,8 @@ void HWConformity::conformBB(G4_BB* bb)
57785778
57795779 fixSelCsel (i, bb);
57805780
5781+ fixPredCtrl (i, bb);
5782+
57815783 if (inst->getExecSize () > builder.getNativeExecSize ())
57825784 {
57835785 if (inst->opcode () == G4_math &&
@@ -7540,3 +7542,57 @@ bool HWConformity::fixIntToHFMove(G4_BB* bb)
75407542 return changed;
75417543}
75427544
7545+ void HWConformity::fixPredCtrl (INST_LIST_ITER it, G4_BB* bb)
7546+ {
7547+ G4_INST* inst = *it;
7548+ G4_Predicate* pred = inst->getPredicate ();
7549+ if (pred && (pred->getControl () == PRED_ANY_WHOLE || pred->getControl () == PRED_ALL_WHOLE))
7550+ {
7551+ // we need WA if pred's size is greater than inst's exec size
7552+ // and the platform does not support predctrl group size (indicated by the fact we
7553+ // have PRED_ANY_WHOLE and PRED_ALL_WHOLE)
7554+ // The case where pred size is less than inst's exec size is already undefined
7555+ // even with predCtrl group size..
7556+ G4_Declare* flagDcl = pred->getTopDcl ();
7557+ if (flagDcl->getNumberFlagElements () > inst->getExecSize ())
7558+ {
7559+ // convert
7560+ // (f0.any32h) sel (1) ...
7561+ // into
7562+ // cmp (1) [ne] f1 f0 0
7563+ // (f1) sel (1) ...
7564+ // and
7565+ // (f0.all32h) sel (1) ...
7566+ // into
7567+ // cmp (1) [e] f1 f0 0xFFFFFFFF
7568+ //
7569+ // if f0 happens to be < 16 elements we have to clear upper bits as well in case it has garbage values
7570+ assert (!inst->getCondMod () && " currently don't handle an instruction with conditional modifier" );
7571+ assert ((inst->isWriteEnableInst () || !bb->isInSimdFlow ()) && " don't handle instruction in SIMD CF for now" );
7572+ G4_Declare* tmpFlag = builder.createTempFlag (1 );
7573+ G4_Type flagType = flagDcl->getNumberFlagElements () == 32 ? Type_UD : Type_UW;
7574+ uint32_t allOneMask = (uint32_t ) ((1ULL << flagDcl->getNumberFlagElements ()) - 1 );
7575+ G4_Declare* cmpSrc0Flag = flagDcl;
7576+ if (flagDcl->getNumberFlagElements () < 16 )
7577+ {
7578+ // clear the upper bit of the flag
7579+ auto andInst = builder.createInst (nullptr , G4_and, nullptr , false , 1 , builder.Create_Dst_Opnd_From_Dcl (tmpFlag, 1 ),
7580+ builder.Create_Src_Opnd_From_Dcl (flagDcl, builder.getRegionScalar ()),
7581+ builder.createImm (allOneMask, Type_UW), InstOpt_WriteEnable);
7582+ bb->insert (it, andInst);
7583+ cmpSrc0Flag = tmpFlag;
7584+ }
7585+ G4_CondMod* condMod = builder.createCondMod (pred->getControl () == PRED_ANY_WHOLE ? Mod_ne : Mod_e,
7586+ tmpFlag->getRegVar (), 0 );
7587+
7588+ G4_Imm* immVal = builder.createImm (pred->getControl () == PRED_ANY_WHOLE ? 0 : allOneMask, flagType);
7589+ // cmp needs to be as wide as the original inst but is uniform and NoMask otherwise
7590+ auto cmpInst = builder.createInst (nullptr , G4_cmp, condMod, false , inst->getExecSize (), builder.createNullDst (flagType),
7591+ builder.createSrcRegRegion (Mod_src_undef, Direct, cmpSrc0Flag->getRegVar (), 0 , 0 , builder.getRegionScalar (), flagType),
7592+ immVal, InstOpt_WriteEnable);
7593+ bb->insert (it, cmpInst);
7594+ inst->setPredicate (builder.createPredicate (pred->getState (), tmpFlag->getRegVar (), 0 ));
7595+ }
7596+ }
7597+ }
7598+
0 commit comments