@@ -3937,7 +3937,6 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39373937 MUST_BE_TRUE (inst->opcode () == G4_pseudo_mad, " expect pseudo mad" );
39383938 bool mustDoMad = IS_TYPE_FLOAT_ALL (inst->getDst ()->getType ());
39393939
3940-
39413940 // try swapping src0 (really src2) and src1 to see if we can save a move
39423941 // some conditions where swap may help:
39433942 // -- if src0 is D, as MAD only supports D + D * W
@@ -3946,8 +3945,9 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39463945 // -- if src1 is scalar, as MAD src2 has more region restrictions
39473946 // We perform the swapping before the dst checks as some platforms require dst and src2 to have the same subreg
39483947 {
3949- G4_Operand* src0 = inst->getSrc (0 );
3950- G4_Operand* src1 = inst->getSrc (1 );
3948+ auto src0 = inst->getSrc (0 );
3949+ auto src1 = inst->getSrc (1 );
3950+
39513951 if (IS_DTYPE (src0->getType ()) && src0->isSrcRegRegion () && !IS_DTYPE (src1->getType ()))
39523952 {
39533953 inst->swapSrc (0 , 1 );
@@ -3957,17 +3957,21 @@ bool HWConformity::generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter)
39573957 // swap src0 and src1 as src0 supports imm
39583958 inst->swapSrc (0 , 1 );
39593959 }
3960- else if (src0->isSrcRegRegion () && !src0->asSrcRegRegion ()->isScalar () &&
3961- src1->isSrcRegRegion () &&
3962- src1->asSrcRegRegion ()->isScalar ())
3960+ else if (isLowPrecisionFloatTy (src0->getType ()) && src1->getType () == Type_F)
39633961 {
3964- // Swap src0 and src1 if src1 is scalar but src0 is not, as src2 regioning support is quite limited.
39653962 inst->swapSrc (0 , 1 );
39663963 }
3967- else if (isLowPrecisionFloatTy (src0-> getType ()) && src1->getType () == Type_F )
3964+ else if (src1-> isSrcRegRegion () && src1->asSrcRegRegion ()-> isScalar () )
39683965 {
3969- inst->swapSrc (0 , 1 );
3966+ bool src0NeedMove = !isGoodAlign1TernarySrc (inst, 0 , true ) ||
3967+ (src0->isSrcRegRegion () && inst->getExecSize () * getTypeSize (src0->getType ()) < getGRFSize ());
3968+ // Swap src0 and src1 if src1 is scalar but src0 may need a move due to limited src2 regioning support.
3969+ if (src0NeedMove)
3970+ {
3971+ inst->swapSrc (0 , 1 );
3972+ }
39703973 }
3974+
39713975 }
39723976
39733977 if (!isGoodAlign1TernaryDst (inst))
0 commit comments