Skip to content

Commit 7deb262

Browse files
DianaChenZuul
authored andcommitted
vISA: Re-adjust indirect call target offset after swsb
Change-Id: If74854a8bd161354264cdfc669af7a048701d2b9
1 parent 0d92bac commit 7deb262

File tree

4 files changed

+224
-175
lines changed

4 files changed

+224
-175
lines changed

visa/Optimizer.cpp

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6894,6 +6894,14 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
68946894
// also return the call with jmpi in VISAKernelImpl::compilePostOptimize
68956895
replaceRetWithJmpi();
68966896
}
6897+
6898+
if (kernel.hasIndirectCall())
6899+
{
6900+
// If the indirect call has regiser src0, the register must be a
6901+
// ip-based address of the call target. Insert a add before call to
6902+
// calculate the relative offset from call to the target
6903+
expandIndirectCallWithRegTarget();
6904+
}
68976905
}
68986906

68996907
class NSDS {
@@ -8259,6 +8267,152 @@ void genBucket(G4_INST *send, Bucket *bucket, RW rwType) {
82598267
}
82608268
}
82618269

8270+
G4_Declare* Optimizer::createInstsForCallTargetOffset(
8271+
InstListType& insts, G4_INST* fcall, int64_t adjust_off)
8272+
{
8273+
// create instruction sequence:
8274+
// add r2.0 -IP call_target
8275+
// add r2.0 r2.0 adjust_off
8276+
8277+
// call's dst must be r1.0, which is reserved at
8278+
// GlobalRA::setABIForStackCallFunctionCalls. It must not be overlapped with
8279+
// r2.0, that is hardcoded as the new jump target
8280+
assert(fcall->getDst()->isGreg());
8281+
assert((fcall->getDst()->getLinearizedStart() / GENX_GRF_REG_SIZ) != 2);
8282+
8283+
// hardcoded add's dst to r2.0
8284+
G4_Declare* add_dst_decl =
8285+
builder.createHardwiredDeclare(1, fcall->getDst()->getType(), 2, 0);
8286+
8287+
// create the first add instruction
8288+
// add r2.0 -IP call_target
8289+
G4_INST* add_inst = builder.createBinOp(
8290+
G4_add, 1,
8291+
builder.Create_Dst_Opnd_From_Dcl(add_dst_decl, 1),
8292+
builder.createSrcRegRegion(
8293+
Mod_Minus, Direct, builder.phyregpool.getIpReg(), 0, 0,
8294+
builder.getRegionScalar(), Type_UD),
8295+
fcall->getSrc(0), InstOpt_WriteEnable | InstOpt_NoCompact, false);
8296+
8297+
// create the second add to add the -ip to adjust_off, adjust_off dependes
8298+
// on how many instructions from the fist add to the jmp instruction, and
8299+
// if it's post-increment (jmpi) or pre-increment (call)
8300+
// add r2.0 r2.0 adjust_off
8301+
G4_INST* add_inst2 = builder.createBinOp(
8302+
G4_add, 1,
8303+
builder.Create_Dst_Opnd_From_Dcl(add_dst_decl, 1),
8304+
builder.Create_Src_Opnd_From_Dcl(
8305+
add_dst_decl, builder.getRegionScalar()),
8306+
builder.createImm(adjust_off, Type_D),
8307+
InstOpt_WriteEnable | InstOpt_NoCompact, false);
8308+
8309+
insts.push_back(add_inst);
8310+
insts.push_back(add_inst2);
8311+
8312+
return add_dst_decl;
8313+
}
8314+
8315+
void Optimizer::createInstForJmpiSequence(InstListType& insts, G4_INST* fcall)
8316+
{
8317+
// SKL workaround for indirect call
8318+
// r1.0 is the return IP (the instruction right after jmpi)
8319+
// r1.1 is the return mask. While we'll replace the ret in calee to jmpi as well,
8320+
// we do not need to consider the return mask here.
8321+
8322+
// Do not allow predicate call on jmpi WA
8323+
assert(fcall->getPredicate() == nullptr);
8324+
8325+
// calculate the reserved register's num from fcall's dst register (shoud be r1)
8326+
assert(fcall->getDst()->isGreg());
8327+
uint32_t reg_num = fcall->getDst()->getLinearizedStart() / GENX_GRF_REG_SIZ;
8328+
8329+
G4_Declare* new_target_decl = createInstsForCallTargetOffset(insts, fcall, -64);
8330+
8331+
// add r1.0 IP 32
8332+
G4_Declare* r1_0_decl =
8333+
builder.createHardwiredDeclare(1, fcall->getDst()->getType(), reg_num, 0);
8334+
insts.push_back(builder.createBinOp(
8335+
G4_add, 1,
8336+
builder.Create_Dst_Opnd_From_Dcl(r1_0_decl, 1),
8337+
builder.createSrcRegRegion(
8338+
Mod_src_undef, Direct, builder.phyregpool.getIpReg(), 0, 0,
8339+
builder.getRegionScalar(), Type_UD),
8340+
builder.createImm(32, Type_UD),
8341+
InstOpt_WriteEnable | InstOpt_NoCompact, false));
8342+
8343+
// jmpi r2.0
8344+
// update jump target (src0) to add's dst
8345+
G4_SrcRegRegion* jump_target = builder.Create_Src_Opnd_From_Dcl(
8346+
new_target_decl, builder.getRegionScalar());
8347+
jump_target->setType(Type_D);
8348+
insts.push_back(builder.createJmp(nullptr, jump_target, InstOpt_NoCompact, false));
8349+
}
8350+
8351+
void Optimizer::expandIndirectCallWithRegTarget()
8352+
{
8353+
// check every fcall
8354+
for (auto bb : kernel.fg)
8355+
{
8356+
// At this point G4_pseudo_fcall may be converted to G4_call,
8357+
// check all call (???)
8358+
if (bb->back()->isFCall()) //|| bb->back()->isCall())
8359+
{
8360+
G4_INST* fcall = bb->back();
8361+
if (fcall->getSrc(0)->isGreg() || fcall->getSrc(0)->isA0()) {
8362+
// at this point the call instruction's src0 has the target_address
8363+
// and the call dst is the reserved register for ret
8364+
// All the caller save register should be saved. We usd r2.0 directly
8365+
// here to calculate the new call's target. We picked r2.0 due to the
8366+
// HW's limitation that call/calla's src and dst offset (the subreg num)
8367+
// must be 0.
8368+
//
8369+
// expand call
8370+
// From:
8371+
// call r1.0 call_target
8372+
// To:
8373+
// add r2.0 -IP call_target
8374+
// add r2.0 r2.0 -32
8375+
// call r1.0 r2.0
8376+
8377+
// For SKL workaround, expand call
8378+
// From:
8379+
// call r1.0 call_target
8380+
// To:
8381+
// add r2.0 -IP call_target
8382+
// add r2.0 r2.0 -64
8383+
// add r1.0 IP 32 // set the return IP
8384+
// jmpi r2.0
8385+
8386+
InstListType expanded_insts;
8387+
if (builder.needReplaceIndirectCallWithJmpi()) {
8388+
createInstForJmpiSequence(expanded_insts, fcall);
8389+
}
8390+
else {
8391+
G4_Declare* jmp_target_decl =
8392+
createInstsForCallTargetOffset(expanded_insts, fcall, -32);
8393+
// Updated call's target to the new target
8394+
G4_SrcRegRegion* jump_target = builder.Create_Src_Opnd_From_Dcl(
8395+
jmp_target_decl, builder.getRegionScalar());
8396+
fcall->setSrc(jump_target, 0);
8397+
fcall->setNoCompacted();
8398+
}
8399+
// then insert the expaneded instructions right before the call
8400+
INST_LIST_ITER insert_point = bb->end();
8401+
--insert_point;
8402+
for (auto inst_to_add : expanded_insts) {
8403+
bb->getInstList().insert(insert_point, inst_to_add);
8404+
inst_to_add->setCISAOff(fcall->getCISAOff());
8405+
}
8406+
8407+
// remove call from the instlist for Jmpi WA
8408+
if (builder.needReplaceIndirectCallWithJmpi())
8409+
bb->getInstList().erase(--bb->end());
8410+
}
8411+
}
8412+
}
8413+
8414+
}
8415+
82628416
// Replace ret with jmpi, must be single return
82638417
void Optimizer::replaceRetWithJmpi()
82648418
{

visa/Optimizer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,15 @@ class Optimizer
252252
void replaceRetWithJmpi();
253253
void doNoMaskWA();
254254
void insertFenceAtEntry();
255+
256+
typedef std::vector<vISA::G4_INST*> InstListType;
257+
void expandIndirectCallWithRegTarget();
258+
void createInstForJmpiSequence(InstListType& insts, G4_INST* fcall);
259+
// create the instructions to calculate the jump target offset, return G4_Declare of the
260+
// new created jmp target
261+
G4_Declare* createInstsForCallTargetOffset(
262+
InstListType& insts, G4_INST* fcall, int64_t adjust_off);
263+
255264
void insertHashMovs();
256265
void insertDummyCompactInst();
257266
void removeLifetimeOps();

visa/VISAKernel.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4343
//forward declaration
4444
namespace vISA
4545
{
46-
class G4_Declare;
47-
class G4_INST;
4846
class G4_Kernel;
4947
class DebugInfoFormat;
5048
class BinaryEncodingBase;
@@ -855,14 +853,8 @@ class VISAKernelImpl : public VISAFunction
855853
int calculateTotalInputSize();
856854
int compileTillOptimize();
857855

858-
// Expand indirect call
859-
typedef std::vector<vISA::G4_INST*> InstListType;
860-
void expandIndirectCallWithRegTarget();
861-
void createInstForJmpiSequence(InstListType& insts, vISA::G4_INST* fcall);
862-
// create the instructions to calculate the jump target offset, return G4_Declare of the
863-
// new created jmp target
864-
vISA::G4_Declare* createInstsForCallTargetOffset(
865-
InstListType& insts, vISA::G4_INST* fcall, int64_t adjust_off);
856+
// Re-adjust indirect call target after swsb
857+
void adjustIndirectCallOffset();
866858

867859
CisaFramework::CisaInst* AppendVISASvmGeneralScatterInst(VISA_PredOpnd* pred,
868860
VISA_EMask_Ctrl emask, VISA_Exec_Size execSize, unsigned char blockSize,

0 commit comments

Comments
 (0)