Skip to content

Commit f239ed0

Browse files
pratikasharigcbot
authored andcommitted
Enable programming of LSC cache backing bit for sampler
message header LSC cache backing sampler feature is programmed on Xe3 by setting a bit in message header. This PR implements this change.
1 parent 284040c commit f239ed0

File tree

6 files changed

+94
-5
lines changed

6 files changed

+94
-5
lines changed

visa/G4_Kernel.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,16 @@ class G4_Kernel {
897897
I->setMaskOption(MO);
898898
}
899899
// end of WA related
900+
901+
// Struct to store inst, opndNum that sets special LSC cache backing bit.
902+
// We reset the bit if LSC backing must be disabled. Note this applies only
903+
// when LSC backing bit is programmed in message header on Xe3.
904+
struct SampleWithLSCBacking {
905+
G4_INST *inst = nullptr;
906+
unsigned int opndNum = 0xffffffff;
907+
unsigned int bitPos = 0xffffffff;
908+
};
909+
std::list<struct SampleWithLSCBacking> samplerWithLSCBacking;
900910
}; // G4_Kernel
901911

902912
} // namespace vISA

visa/HWCaps.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,4 +939,9 @@ bool needTGMDoubleFenceWA() const {
939939
bool useDynamicAddrForExDesc() const {
940940
return getOption(vISA_dynamicAddrForExDescInLscSend);
941941
}
942+
943+
bool samplerCachingInLSCHeader() const {
944+
return (getPlatform() >= TARGET_PLATFORM::Xe3 &&
945+
getOption(vISA_enableSamplerLSCCaching));
946+
}
942947
// end HW capabilities

visa/Optimizer.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,8 @@ void Optimizer::initOptimizations() {
683683
OPT_INITIALIZE_PASS(reassignBlockIDs, vISA_EnableAlways, TimerID::MISC_OPTS);
684684
OPT_INITIALIZE_PASS(evalAddrExp, vISA_EnableAlways, TimerID::MISC_OPTS);
685685
OPT_INITIALIZE_PASS(FoldAddrImmediate, vISA_FoldAddrImmed, TimerID::MISC_OPTS);
686+
OPT_INITIALIZE_PASS(fixSamplerCacheBitInHeader, vISA_EnableAlways,
687+
TimerID::MISC_OPTS);
686688
OPT_INITIALIZE_PASS(localSchedule, vISA_LocalScheduling, TimerID::SCHEDULING);
687689
OPT_INITIALIZE_PASS(HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
688690
OPT_INITIALIZE_PASS(fixEndIfWhileLabels, vISA_EnableAlways, TimerID::NUM_TIMERS);
@@ -966,6 +968,9 @@ int Optimizer::optimization() {
966968
return VISA_SPILL;
967969
}
968970

971+
runPass(PI_fixSamplerCacheBitInHeader);
972+
973+
969974
runPass(PI_removeLifetimeOps);
970975

971976
// HW workaround after RA
@@ -1322,6 +1327,47 @@ void Optimizer::removePseudoMov() {
13221327
}
13231328
}
13241329

1330+
void Optimizer::fixSamplerCacheBitInHeader() {
1331+
// If LSC caching is globally disabled then builder won't set
1332+
// caching bit in sampler message header. So return without doing anything.
1333+
//
1334+
// If current object is kernel:
1335+
// * Return without doing anything if spill size <= threshold
1336+
// ie, LSC caching bit is set in sampler message header.
1337+
// * If spill size > threshold, reset cache bit in sampler
1338+
// message header.
1339+
//
1340+
// If current object is a stack call function:
1341+
// * Since we cannot tell scratch space size of entire call
1342+
// graph, do nothing and return. For stack call, default
1343+
// behavior is that LSC caching bit is set. One can still
1344+
// disable LSC caching globally.
1345+
if (!builder.getOption(vISA_enableSamplerLSCCaching))
1346+
return;
1347+
1348+
if (fg.getIsStackCallFunc())
1349+
return;
1350+
1351+
const auto spillSizeThreshold =
1352+
builder.getuint32Option(vISA_samplerLSCCachingThreshold);
1353+
1354+
const auto &jitInfo = builder.getJitInfo();
1355+
if (jitInfo->stats.spillMemUsed <= spillSizeThreshold)
1356+
return;
1357+
1358+
for (auto &entry : kernel.samplerWithLSCBacking) {
1359+
auto *inst = entry.inst;
1360+
auto opndNum = entry.opndNum;
1361+
auto LSCBackingBit = entry.bitPos;
1362+
// We need to reset bit# LSCBackingBit, create new G4_Imm and assign it to
1363+
// inst.
1364+
int64_t imm64 = inst->getSrc(opndNum)->asImm()->getInt();
1365+
// Reset bit in imm64
1366+
imm64 ^= (1ll << LSCBackingBit);
1367+
auto *newImm = builder.createImm(imm64, Type_UD);
1368+
inst->setSrc(newImm, opndNum);
1369+
}
1370+
}
13251371
void Optimizer::FoldAddrImmediate() {
13261372
AddrSubReg_Node *addrRegInfo =
13271373
new AddrSubReg_Node[builder.getNumAddrRegisters()];

visa/Optimizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ class Optimizer {
123123
INST_LIST_ITER iend);
124124
void removePseudoMov();
125125
void FoldAddrImmediate();
126+
void fixSamplerCacheBitInHeader();
126127
bool foldCmpSel(G4_BB *BB, G4_INST *selInst, INST_LIST_ITER &selInst_II);
127128
bool foldPseudoNot(G4_BB *bb, INST_LIST_ITER &iter);
128129
bool createSmov(G4_BB *bb, G4_INST *flagMove, G4_INST *nextInst);
@@ -367,6 +368,7 @@ class Optimizer {
367368
PI_reassignBlockIDs, // always
368369
PI_evalAddrExp, // always
369370
PI_FoldAddrImmediate,
371+
PI_fixSamplerCacheBitInHeader,
370372
PI_localSchedule,
371373
PI_HWWorkaround, // always
372374
PI_fixEndIfWhileLabels, // always

visa/VisaToG4/TranslateSend3D.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,17 +1763,37 @@ static G4_Operand *createSampleHeader(IR_Builder *builder, G4_Declare *header,
17631763
unsigned int secondDword = createSampleHeader0Dot2(
17641764
actualop, pixelNullMask, aoffimmiVal, srcChannel, builder);
17651765

1766+
bool addToList = false;
1767+
const uint32_t LSCBacking = 29;
1768+
if (builder->samplerCachingInLSCHeader()) {
1769+
// Bit 29: Enables LSC as second level cache for Sampler
1770+
secondDword |= (1 << 29);
1771+
addToList = true;
1772+
}
1773+
17661774
G4_Imm *immOpndSecondDword = builder->createImm(secondDword, Type_UD);
17671775
G4_DstRegRegion *payloadDstRgn =
17681776
builder->createDst(header->getRegVar(), 0, 2, 1, Type_UD);
1777+
G4_INST *headerInst = nullptr;
17691778
if (aoffimmi->isImm()) {
17701779
// mov (1) payload(0,2) immOpndSecondDword
1771-
builder->createMov(g4::SIMD1, payloadDstRgn, immOpndSecondDword,
1772-
InstOpt_WriteEnable, true);
1780+
headerInst =
1781+
builder->createMov(g4::SIMD1, payloadDstRgn, immOpndSecondDword,
1782+
InstOpt_WriteEnable, true);
1783+
if (addToList)
1784+
{
1785+
builder->kernel.samplerWithLSCBacking.push_back(
1786+
{headerInst, 0, LSCBacking});
1787+
}
17731788
} else {
17741789
// or (1) payload(0,2) aoffimmi<0;1,0>:uw immOpndSeconDword
1775-
builder->createBinOp(G4_or, g4::SIMD1, payloadDstRgn, aoffimmi,
1776-
immOpndSecondDword, InstOpt_WriteEnable, true);
1790+
headerInst =
1791+
builder->createBinOp(G4_or, g4::SIMD1, payloadDstRgn, aoffimmi,
1792+
immOpndSecondDword, InstOpt_WriteEnable, true);
1793+
if (addToList) {
1794+
builder->kernel.samplerWithLSCBacking.push_back(
1795+
{headerInst, 1, LSCBacking});
1796+
}
17771797
}
17781798

17791799
if (sampler != nullptr) {
@@ -2480,7 +2500,7 @@ int IR_Builder::translateVISALoad3DInst(
24802500
G4_SrcRegRegion **opndArray) {
24812501
TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION);
24822502

2483-
bool useHeader = false;
2503+
bool useHeader = samplerCachingInLSCHeader();
24842504

24852505
G4_ExecSize execSize = toExecSize(executionSize);
24862506
G4_InstOpts instOpt = Get_Gen4_Emask(em, execSize);

visa/include/VISAOptionsDefs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ DEF_VISA_OPTION(vISA_SkipRedundantFillInRMW, ET_BOOL, "-normwopt", UNUSED, true)
186186
DEF_VISA_OPTION(vISA_ALTMode, ET_BOOL, "-nonALTMode", UNUSED, false)
187187
DEF_VISA_OPTION(vISA_CoalesceScalarMoves, ET_BOOL, "-enableCoalesceScalarMoves",
188188
UNUSED, false)
189+
DEF_VISA_OPTION(vISA_enableSamplerLSCCaching, ET_BOOL_TRUE,
190+
"-samplerLSCCaching",
191+
"global flag [0|1] to control LSC caching for sampler", true)
192+
DEF_VISA_OPTION(vISA_samplerLSCCachingThreshold, ET_INT32,
193+
"-samplerLSCCachingThreshold",
194+
"spill size threshold to disable LSC caching for sampler", 0)
189195
DEF_VISA_OPTION(vISA_SinkBarrierWait, ET_BOOL_TRUE, "-sinkBarrierWait",
190196
"Barrier signal and wait are usually scheduled back to back. "
191197
"The option is used to sink barrier wait away from signal as "

0 commit comments

Comments
 (0)