Skip to content

Commit 380a925

Browse files
pratikasharigcbot
authored andcommitted
Store CE register value on stack on function entry
Store CE register value on stack on function entry. This adds overhead so it's currently enabled only for -O0.
1 parent 46ce6ed commit 380a925

File tree

9 files changed

+133
-6
lines changed

9 files changed

+133
-6
lines changed

IGC/DebugInfo/VISADebugDecoder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ void IGC::DbgDecoder::CallFrameInfo::print(llvm::raw_ostream &OS) const {
141141
PrintItems(OS, retAddr, "\n ");
142142
OS << " ]\n";
143143

144+
OS << " CEOffsetFromFPOff: " << CEOffsetFromFPOff << "\n";
145+
OS << " CEStoreIP: " << CEStoreIP << "\n";
146+
144147
OS << " callee save entry list: [\n ";
145148
PrintItems(OS, calleeSaveEntry, "\n ");
146149
OS << " ]\n";

IGC/DebugInfo/VISADebugDecoder.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ class DbgDecoder {
182182
std::vector<LiveIntervalGenISA> callerbefp;
183183
bool retAddrValid = false;
184184
std::vector<LiveIntervalGenISA> retAddr;
185+
uint16_t CEOffsetFromFPOff;
186+
uint16_t CEStoreIP;
185187
uint16_t numCalleeSaveEntries = 0;
186188
std::vector<PhyRegSaveInfoPerIP> calleeSaveEntry;
187189
uint32_t numCallerSaveEntries = 0;
@@ -377,6 +379,8 @@ class DbgDecoder {
377379
f.cfi.retAddr.push_back(lv);
378380
}
379381
}
382+
f.cfi.CEOffsetFromFPOff = read<uint16_t>(dbg);
383+
f.cfi.CEStoreIP = read<uint16_t>(dbg);
380384
f.cfi.numCalleeSaveEntries = read<uint16_t>(dbg);
381385
for (unsigned int j = 0; j != f.cfi.numCalleeSaveEntries; j++) {
382386
PhyRegSaveInfoPerIP phyRegSave;

visa/DebugInfo.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,24 @@ int DbgDecoder::ddDbg() {
424424

425425
std::cout << "\n";
426426

427+
uint16_t CEOffset = 0;
428+
uint16_t CEIP = 0;
429+
retval = fread(&CEOffset, sizeof(uint16_t), 1, dbgFile);
430+
if (!retval)
431+
return -1;
432+
retval = fread(&CEIP, sizeof(uint16_t), 1, dbgFile);
433+
if (!retval)
434+
return -1;
435+
436+
if (CEOffset != 0xffff) {
437+
std::cout << "CE saved at: FP + " << (uint16_t)CEOffset << " from IP "
438+
<< CEIP << "\n";
439+
}
440+
else
441+
std::cout << "CE not saved\n";
442+
443+
std::cout << "\n";
444+
427445
std::cout << "Callee save:\n";
428446
ddCalleeCallerSave(reloc_offset, CALLEE);
429447
std::cout << "\n";
@@ -1352,6 +1370,21 @@ void emitDataCallFrameInfo(VISAKernelImpl *visaKernel, T &t) {
13521370
emitDataUInt8((uint8_t)0, t);
13531371
}
13541372

1373+
if (kernel->getOption(vISA_storeCE) &&
1374+
kernel->getKernelDebugInfo()->getCESaveInst()) {
1375+
uint16_t FPOffset = kernel->getKernelDebugInfo()->getCESaveOffset();
1376+
emitDataUInt16(FPOffset, t);
1377+
auto GenOffset =
1378+
kernel->getKernelDebugInfo()->getCESaveInst()->getGenOffset() +
1379+
getBinInstSize(kernel->getKernelDebugInfo()->getCESaveInst());
1380+
vISA_ASSERT(GenOffset <= std::numeric_limits<uint16_t>::max(),
1381+
"GenOffset is OOB");
1382+
emitDataUInt16((uint16_t)GenOffset, t);
1383+
} else {
1384+
emitDataUInt16(-1, t);
1385+
emitDataUInt16(0, t);
1386+
}
1387+
13551388
emitDataCalleeSave(visaKernel, t);
13561389

13571390
emitDataCallerSave(visaKernel, t);
@@ -1522,6 +1555,8 @@ KernelDebugInfo::KernelDebugInfo() : varNameMapAlloc(4096) {
15221555
fretVar = nullptr;
15231556
reloc_offset = 0;
15241557
missingVISAIdsComputed = false;
1558+
saveCE = nullptr;
1559+
CEStoreOffset = 0;
15251560
}
15261561

15271562
void KernelDebugInfo::updateRelocOffset() {
@@ -1943,6 +1978,10 @@ void KernelDebugInfo::updateExpandedIntrinsic(G4_InstIntrinsic *spillOrFill,
19431978
if (spillOrFill == getCallerBEFPSaveInst()) {
19441979
setCallerBEFPSaveInst(inst);
19451980
}
1981+
1982+
if (spillOrFill == getCESaveInst()) {
1983+
setSaveCEInst(inst);
1984+
}
19461985
}
19471986

19481987
void KernelDebugInfo::addCallerSaveInst(G4_BB *fcallBB, G4_INST *inst) {

visa/DebugInfo.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,8 @@ class KernelDebugInfo {
203203
G4_INST *setupFP;
204204
// Instruction that destroys BE_FP for current frame
205205
G4_INST *restoreSP;
206+
// Instruction that stores CE in prolog
207+
G4_INST *saveCE;
206208

207209
// Current frame size in bytes
208210
uint32_t frameSize;
@@ -211,6 +213,9 @@ class KernelDebugInfo {
211213
// NULL for kernel.
212214
G4_Declare *fretVar;
213215

216+
// Offset from FP where CE is stored
217+
uint16_t CEStoreOffset;
218+
214219
// Caller save/restore
215220
// std::vector<std::pair<fcall inst BB, std::pair<first caller save, last
216221
// caller restore>>> One entry per fcall inst in current compilation unit
@@ -303,6 +308,12 @@ class KernelDebugInfo {
303308
G4_Declare *getFretVar() const { return fretVar; }
304309
void setFretVar(G4_Declare *dcl) { fretVar = dcl; }
305310

311+
G4_INST *getCESaveInst() const { return saveCE; }
312+
void setSaveCEInst(G4_INST *i) { saveCE = i; }
313+
314+
void setCESaveOffset(uint16_t Off) { CEStoreOffset = Off; }
315+
uint16_t getCESaveOffset() const { return CEStoreOffset; }
316+
306317
void updateExpandedIntrinsic(G4_InstIntrinsic *spillOrFill, G4_INST *inst);
307318
void addCallerSaveInst(G4_BB *fcallBB, G4_INST *inst);
308319
void addCallerRestoreInst(G4_BB *fcallBB, G4_INST *inst);
@@ -540,10 +551,14 @@ struct CallFrameInfo
540551
uint8_t befpValid;
541552
VarLiveIntervalGenISA befp; // Validity depends on flag befpValid
542553
uint8_t callerbefpValid;
543-
VarLiveIntervalGenISA callerbefp; // Validity depends on flag
544-
callerbefpValid uint8_t retAddrValid; VarLiveIntervalGenISA retAddr; // Validity
545-
depends on flag retAddrValid uint16_t numCalleeSaveEntries; PhyRegSaveInfoPerIP
546-
calleeSaveEntry[numCalleeSaveEntries];
554+
VarLiveIntervalGenISA callerbefp; // Validity depends on flag callerbefpValid
555+
uint8_t retAddrValid;
556+
VarLiveIntervalGenISA retAddr; // Validity depends on flag retAddrValid
557+
uint16_t CEOffsetFromFPOff; // -1 means CE not saved offset
558+
uint16_t CEStoreIP; // This field stores IP where CE is stored.
559+
// Valid only if CEOffsetFromFPOff != -1.
560+
uint16_t numCalleeSaveEntries;
561+
PhyRegSaveInfoPerIP calleeSaveEntry[numCalleeSaveEntries];
547562
// Need this because of following:
548563
//
549564
// V10 -> r2, r3, r4, r5, r6, r7

visa/G4_Kernel.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,10 @@ class StackCallABI {
463463
uint32_t getSpillHeaderGRF() const;
464464

465465
uint32_t getThreadHeaderGRF() const;
466+
467+
uint32_t getFrameDescriptorByteSize() const {
468+
return (version == StackCallABIVersion::VER_3) ? 64 : 32;
469+
}
466470
};
467471

468472
// represents an argument placement

visa/GraphColor.cpp

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9224,6 +9224,59 @@ void GlobalRA::addCalleeSavePseudoCode() {
92249224
builder.instList.clear();
92259225
}
92269226

9227+
void GlobalRA::storeCEInProlog() {
9228+
if (!kernel.getOption(vISA_storeCE))
9229+
return;
9230+
9231+
// If we've to store CE in prolog, we emit:
9232+
// TmpReg (GRF_Aligned) = CE0.0
9233+
// Store TmpReg @ FP+Offset
9234+
//
9235+
// Where Offset = 1 GRF size in bytes
9236+
9237+
// Create new variable equal to GRF size so it's always GRF aligned.
9238+
// It's transitory so shouldn't impact register pressure. We want to
9239+
// write CE0.0 in 0th location of this variable so that it can be
9240+
// used as send payload.
9241+
auto TmpReg = builder.createDeclare(
9242+
"TmpCEReg", G4_GRF, builder.numEltPerGRF<Type_UD>(), 1, Type_UD);
9243+
auto *DstRgn = builder.createDstRegRegion(TmpReg, 1);
9244+
auto *CEReg = regPool.getMask0Reg();
9245+
auto *SrcOpnd = builder.createSrc(
9246+
CEReg, 0, 0, kernel.fg.builder->getRegionScalar(), Type_UD);
9247+
auto Mov = builder.createMov(g4::SIMD1, DstRgn, SrcOpnd,
9248+
G4_InstOption::InstOpt_WriteEnable, false);
9249+
auto nextPos = kernel.fg.getEntryBB()->insertBefore(
9250+
kernel.fg.getEntryBB()->getFirstInsertPos(), Mov);
9251+
9252+
auto payloadSrc =
9253+
builder.createSrcRegRegion(TmpReg, builder.getRegionStride1());
9254+
const unsigned execSize = 8;
9255+
G4_DstRegRegion *postDst = builder.createNullDst(Type_UD);
9256+
G4_INST *store = nullptr;
9257+
unsigned int HWOffset = builder.numEltPerGRF<Type_UB>() / getHWordByteSize();
9258+
vISA_ASSERT(kernel.stackCall.getFrameDescriptorByteSize() <=
9259+
builder.numEltPerGRF<Type_UB>(),
9260+
"ce0 overwrote FDE");
9261+
kernel.getKernelDebugInfo()->setCESaveOffset(HWOffset * getHWordByteSize());
9262+
9263+
if (builder.supportsLSC()) {
9264+
auto headerOpnd = getSpillFillHeader(*kernel.fg.builder, nullptr);
9265+
store = builder.createSpill(postDst, headerOpnd, payloadSrc,
9266+
G4_ExecSize(execSize), 1, HWOffset,
9267+
builder.getBEFP(), InstOpt_WriteEnable, false);
9268+
} else {
9269+
store = builder.createSpill(postDst, payloadSrc, G4_ExecSize(execSize), 1,
9270+
HWOffset, builder.getBEFP(),
9271+
InstOpt_WriteEnable, false);
9272+
}
9273+
kernel.fg.getEntryBB()->insertAfter(nextPos, store);
9274+
9275+
if (builder.kernel.getOption(vISA_GenerateDebugInfo)) {
9276+
builder.kernel.getKernelDebugInfo()->setSaveCEInst(store);
9277+
}
9278+
}
9279+
92279280
//
92289281
// Insert store r125.[0-4] at entry and restore before return.
92299282
// Dst of store will be a hardwired temp at upper end of caller save area.
@@ -10622,6 +10675,7 @@ void GlobalRA::stackCallSaveRestore(bool hasStackCall) {
1062210675
// Only GENX sub-graphs require callee-save code.
1062310676

1062410677
if (builder.getIsKernel() == false) {
10678+
storeCEInProlog();
1062510679
addCalleeSavePseudoCode();
1062610680
addStoreRestoreToReturn();
1062710681
}
@@ -11205,8 +11259,13 @@ int GlobalRA::coloringRegAlloc() {
1120511259

1120611260
if (kernel.fg.getIsStackCallFunc()) {
1120711261
// Allocate space to store Frame Descriptor
11208-
nextSpillOffset += 32;
11209-
scratchOffset += 32;
11262+
nextSpillOffset += builder.numEltPerGRF<Type_UB>();
11263+
scratchOffset += builder.numEltPerGRF<Type_UB>();
11264+
11265+
if (kernel.getOption(vISA_storeCE)) {
11266+
nextSpillOffset += builder.numEltPerGRF<Type_UB>();
11267+
scratchOffset += builder.numEltPerGRF<Type_UB>();
11268+
}
1121011269
}
1121111270

1121211271
// Global linear scan RA

visa/GraphColor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,7 @@ class GlobalRA {
20422042
void addCallerSavePseudoCode();
20432043
void addCalleeSavePseudoCode();
20442044
void addStoreRestoreToReturn();
2045+
void storeCEInProlog();
20452046
void markGraphBlockLocalVars();
20462047
void verifyRA(LivenessAnalysis &liveAnalysis);
20472048
void verifySpillFill();

visa/Option.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ bool Options::parseOptions(int argc, const char *argv[]) {
263263
m_vISAOptions.setBool(vISA_SplitGRFAlignedScalar, false);
264264
m_vISAOptions.setBool(vISA_SkipRedundantFillInRMW, false);
265265
m_vISAOptions.setBool(vISA_EnableDCE, false);
266+
m_vISAOptions.setBool(vISA_storeCE, true);
266267
m_vISAOptions.setBool(vISA_Debug, true);
267268
}
268269
if (m_vISAOptions.isArgSetByUser(vISA_Stepping)) {

visa/include/VISAOptionsDefs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ DEF_VISA_OPTION(vISA_disableInstDebugInfo, ET_BOOL, "-disableInstDebugInfo",
6161
UNUSED, false)
6262
DEF_VISA_OPTION(vISA_analyzeMove, ET_BOOL, "-analyzeMove", UNUSED, false)
6363
DEF_VISA_OPTION(vISA_skipFDE, ET_BOOL, "-skipFDE", UNUSED, false)
64+
DEF_VISA_OPTION(vISA_storeCE, ET_BOOL, "-storeCE", UNUSED, false)
6465
// setting this flag makes VISA emit matching name for variable wrt visaasm file
6566
// but this makes it impossible to emit correct elf, so this is strictly for
6667
// debugging

0 commit comments

Comments
 (0)