Skip to content

Commit 559869c

Browse files
weiyu-chenigcbot
authored andcommitted
Add option to schedule fence commit move.
1 parent fa3d733 commit 559869c

File tree

4 files changed

+64
-3
lines changed

4 files changed

+64
-3
lines changed

visa/Gen4_IR.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2463,6 +2463,19 @@ class G4_Operand
24632463
return Rel_disjoint;
24642464
}
24652465

2466+
// should only be called post-RA, return true if this operand has overlapping GRF with other
2467+
// ToDo: extend to non-GRF operands?
2468+
bool hasOverlappingGRF(G4_Operand* other)
2469+
{
2470+
if (!other || !isGreg() || !other->isGreg())
2471+
{
2472+
return false;
2473+
}
2474+
auto LB = getLinearizedStart(), RB = getLinearizedEnd();
2475+
auto otherLB = other->getLinearizedStart(), otherRB = other->getLinearizedEnd();
2476+
return !(RB < otherLB || LB > otherRB);
2477+
}
2478+
24662479
static G4_Type GetNonVectorImmType(G4_Type type)
24672480
{
24682481
switch (type)

visa/Optimizer.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6877,6 +6877,41 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68776877
}
68786878

68796879

6880+
// returns for this fence instruction the iterator position where the commit move should be inserted.
6881+
// We conservatively assume a commit is needed before
6882+
// -- another send
6883+
// -- any optimization barrier
6884+
// -- any instruction that writes to fence's dst GRF
6885+
// If another instruction happens to read dst GRF, then it serves as the commit and we don't need the dummy move
6886+
std::optional<INST_LIST_ITER> Optimizer::findFenceCommitPos(INST_LIST_ITER fence, G4_BB* bb) const
6887+
{
6888+
auto fenceInst = *fence;
6889+
assert(fenceInst->isSend() && fenceInst->asSendInst()->isFence());
6890+
auto dst = fenceInst->getDst();
6891+
auto I = std::next(fence);
6892+
for (auto E = bb->end(); I != E; ++I)
6893+
{
6894+
G4_INST* inst = *I;
6895+
if (inst->isSend() || inst->isOptBarrier())
6896+
{
6897+
break;
6898+
}
6899+
if (dst->hasOverlappingGRF(inst->getDst()))
6900+
{
6901+
break;
6902+
}
6903+
for (auto SI = inst->src_begin(), SE = inst->src_end(); SI != SE; ++SI)
6904+
{
6905+
auto src = *SI;
6906+
if (dst->hasOverlappingGRF(src))
6907+
{
6908+
return std::nullopt;
6909+
}
6910+
}
6911+
}
6912+
return I;
6913+
}
6914+
68806915
// some workaround for HW restrictions. We apply them here so as not to affect optimizations, RA, and scheduling
68816916
void Optimizer::HWWorkaround()
68826917
{
@@ -6890,6 +6925,7 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68906925

68916926
// set physical pred/succ as it's needed for the call WA
68926927
fg.setPhysicalPredSucc();
6928+
const bool scheduleFenceCommit = builder.getOption(vISA_scheduleFenceCommit);
68936929
BB_LIST_ITER ib, bend(fg.end());
68946930
for (ib = fg.begin(); ib != bend; ++ib)
68956931
{
@@ -6908,9 +6944,17 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
69086944
{
69096945
// commit is enabled for the fence, need to generate a move after to make sure the fence is complete
69106946
// mov (8) r1.0<1>:ud r1.0<8;8,1>:ud {NoMask}
6911-
INST_LIST_ITER nextIter = ii;
6912-
nextIter++;
6913-
G4_DstRegRegion* dst = inst->getDst();
6947+
auto nextIter = std::next(ii);
6948+
if (scheduleFenceCommit)
6949+
{
6950+
auto iter = findFenceCommitPos(ii, bb);
6951+
if (!iter)
6952+
{
6953+
continue; // skip this fence
6954+
}
6955+
nextIter = *iter;
6956+
}
6957+
auto dst = inst->getDst();
69146958
G4_Declare* fenceDcl = dst->getBase()->asRegVar()->getDeclare();
69156959
G4_DstRegRegion* movDst = builder.createDst(
69166960
builder.phyregpool.getNullReg(), 0, 0, 1, fenceDcl->getElemType());

visa/Optimizer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
3434
#include "LocalScheduler/LocalScheduler_G4IR.h"
3535
#include "LocalScheduler/SWSB_G4IR.h"
3636
#include <unordered_set>
37+
#include <optional>
3738

3839
typedef struct{
3940
short immAddrOff = 0;
@@ -377,6 +378,8 @@ class Optimizer
377378

378379
bool isCopyPropProfitable(G4_INST* movInst) const;
379380

381+
std::optional<INST_LIST_ITER> findFenceCommitPos(INST_LIST_ITER fence, G4_BB* bb) const;
382+
380383
public:
381384
Optimizer(vISA::Mem_Manager& m, IR_Builder& b, G4_Kernel& k, FlowGraph& f) :
382385
builder(b), kernel(k), fg(f), mem(m), RAFail(false)

visa/include/VISAOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ DEF_VISA_OPTION(vISA_reassociate, ET_BOOL, "-noreassoc", UNUSED, tru
7575
DEF_VISA_OPTION(vISA_split4GRFVar, ET_BOOL, "-no4GRFSplit", UNUSED, true)
7676
DEF_VISA_OPTION(vISA_divergentBB, ET_BOOL, "-divergentBB", UNUSED, true)
7777
DEF_VISA_OPTION(vISA_splitInstructions, ET_BOOL, "-noSplitInstructions", UNUSED, true)
78+
DEF_VISA_OPTION(vISA_scheduleFenceCommit, ET_BOOL, NULLSTR, UNUSED, false)
7879

7980
//=== code gen options ===
8081
DEF_VISA_OPTION(vISA_noSrc1Byte, ET_BOOL, "-nosrc1byte", UNUSED, false)

0 commit comments

Comments
 (0)