Skip to content

Commit c64fbd1

Browse files
fangliu2020igcbot
authored andcommitted
Fix address register restriction on dst
If destination is an address register, destination must not span across the lower to upper 8 dword boundary of the register.
1 parent 606de4a commit c64fbd1

File tree

2 files changed

+20
-11
lines changed

2 files changed

+20
-11
lines changed

visa/Optimizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class Optimizer {
266266
void applyNamedBarrierWA(INST_LIST_ITER it, G4_BB *bb);
267267
void insertIEEEExceptionTrap();
268268
void expandIEEEExceptionTrap(INST_LIST_ITER it, G4_BB *bb);
269+
void fixDirectAddrBoundOnDst();
269270

270271
typedef std::vector<vISA::G4_INST *> InstListType;
271272
// create instruction sequence to calculate call offset from ip

visa/SWWA.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3915,17 +3915,6 @@ void Optimizer::HWWorkaround() {
39153915
LSC_FENCE_OP_NONE)
39163916
bb->insertBefore(ii, inst->cloneInst());
39173917

3918-
// When destination is an address register the following apply:
3919-
// Destination must not span across the lower to upper 8 dword
3920-
// boundary of the register.
3921-
// Fix this restriction after RA instead of HWConformity just because
3922-
// RA(spill/fill, A0 save/restore) would generate such instructions.
3923-
if (inst->getExecSize() == g4::SIMD32 && inst->getDst() &&
3924-
inst->getDst()->isDirectA0()) {
3925-
HWConformity hwConf(builder, kernel);
3926-
hwConf.evenlySplitInst(ii, bb, /*checkOverlap*/ false);
3927-
}
3928-
39293918
ii++;
39303919
}
39313920
}
@@ -3980,8 +3969,27 @@ void Optimizer::HWWorkaround() {
39803969
if (builder.hasFPU0ReadSuppressionIssue()) {
39813970
fixReadSuppressioninFPU0();
39823971
}
3972+
3973+
if (builder.supportNativeSIMD32())
3974+
fixDirectAddrBoundOnDst();
39833975
}
39843976

3977+
// When destination is an address register the following apply:
3978+
// Destination must not span across the lower to upper 8 dword
3979+
// boundary of the register.
3980+
// Fix this restriction after RA instead of HWConformity just because
3981+
// RA(spill/fill, A0 save/restore) would generate such instructions.
3982+
void Optimizer::fixDirectAddrBoundOnDst() {
3983+
HWConformity hwConf(builder, kernel);
3984+
for (auto bb : kernel.fg) {
3985+
for (auto it = bb->begin(), ie = bb->end(); it != ie; ++it) {
3986+
G4_INST *inst = *it;
3987+
if (inst->getExecSize() == g4::SIMD32 && inst->getDst() &&
3988+
inst->getDst()->isDirectA0())
3989+
hwConf.evenlySplitInst(it, bb, /*checkOverlap*/ false);
3990+
}
3991+
}
3992+
}
39853993

39863994
static bool retires(G4_Operand *Opnd, G4_INST *SI) {
39873995
vASSERT(SI);

0 commit comments

Comments
 (0)