@@ -6877,6 +6877,41 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68776877 }
68786878
68796879
6880+ // returns for this fence instruction the iterator position where the commit move should be inserted.
6881+ // We conservatively assume a commit is needed before
6882+ // -- another send
6883+ // -- any optimization barrier
6884+ // -- any instruction that writes to fence's dst GRF
6885+ // If another instruction happens to read dst GRF, then it serves as the commit and we don't need the dummy move
6886+ std::optional<INST_LIST_ITER> Optimizer::findFenceCommitPos (INST_LIST_ITER fence, G4_BB* bb) const
6887+ {
6888+ auto fenceInst = *fence;
6889+ assert (fenceInst->isSend () && fenceInst->asSendInst ()->isFence ());
6890+ auto dst = fenceInst->getDst ();
6891+ auto I = std::next (fence);
6892+ for (auto E = bb->end (); I != E; ++I)
6893+ {
6894+ G4_INST* inst = *I;
6895+ if (inst->isSend () || inst->isOptBarrier ())
6896+ {
6897+ break ;
6898+ }
6899+ if (dst->hasOverlappingGRF (inst->getDst ()))
6900+ {
6901+ break ;
6902+ }
6903+ for (auto SI = inst->src_begin (), SE = inst->src_end (); SI != SE; ++SI)
6904+ {
6905+ auto src = *SI;
6906+ if (dst->hasOverlappingGRF (src))
6907+ {
6908+ return std::nullopt ;
6909+ }
6910+ }
6911+ }
6912+ return I;
6913+ }
6914+
68806915 // some workaround for HW restrictions. We apply them here so as not to affect optimizations, RA, and scheduling
68816916 void Optimizer::HWWorkaround ()
68826917 {
@@ -6890,6 +6925,7 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68906925
68916926 // set physical pred/succ as it's needed for the call WA
68926927 fg.setPhysicalPredSucc ();
6928+ const bool scheduleFenceCommit = builder.getOption (vISA_scheduleFenceCommit);
68936929 BB_LIST_ITER ib, bend (fg.end ());
68946930 for (ib = fg.begin (); ib != bend; ++ib)
68956931 {
@@ -6908,9 +6944,17 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
69086944 {
69096945 // commit is enabled for the fence, need to generate a move after to make sure the fence is complete
69106946 // mov (8) r1.0<1>:ud r1.0<8;8,1>:ud {NoMask}
6911- INST_LIST_ITER nextIter = ii;
6912- nextIter++;
6913- G4_DstRegRegion* dst = inst->getDst ();
6947+ auto nextIter = std::next (ii);
6948+ if (scheduleFenceCommit)
6949+ {
6950+ auto iter = findFenceCommitPos (ii, bb);
6951+ if (!iter)
6952+ {
6953+ continue ; // skip this fence
6954+ }
6955+ nextIter = *iter;
6956+ }
6957+ auto dst = inst->getDst ();
69146958 G4_Declare* fenceDcl = dst->getBase ()->asRegVar ()->getDeclare ();
69156959 G4_DstRegRegion* movDst = builder.createDst (
69166960 builder.phyregpool .getNullReg (), 0 , 0 , 1 , fenceDcl->getElemType ());
0 commit comments