@@ -683,6 +683,8 @@ void Optimizer::initOptimizations() {
683683 OPT_INITIALIZE_PASS (reassignBlockIDs, vISA_EnableAlways, TimerID::MISC_OPTS);
684684 OPT_INITIALIZE_PASS (evalAddrExp, vISA_EnableAlways, TimerID::MISC_OPTS);
685685 OPT_INITIALIZE_PASS (FoldAddrImmediate, vISA_FoldAddrImmed, TimerID::MISC_OPTS);
686+ OPT_INITIALIZE_PASS (fixSamplerCacheBitInHeader, vISA_EnableAlways,
687+ TimerID::MISC_OPTS);
686688 OPT_INITIALIZE_PASS (localSchedule, vISA_LocalScheduling, TimerID::SCHEDULING);
687689 OPT_INITIALIZE_PASS (HWWorkaround, vISA_EnableAlways, TimerID::MISC_OPTS);
688690 OPT_INITIALIZE_PASS (fixEndIfWhileLabels, vISA_EnableAlways, TimerID::NUM_TIMERS);
@@ -966,6 +968,9 @@ int Optimizer::optimization() {
966968 return VISA_SPILL;
967969 }
968970
971+ runPass (PI_fixSamplerCacheBitInHeader);
972+
973+
969974 runPass (PI_removeLifetimeOps);
970975
971976 // HW workaround after RA
@@ -1322,6 +1327,47 @@ void Optimizer::removePseudoMov() {
13221327 }
13231328}
13241329
1330+ void Optimizer::fixSamplerCacheBitInHeader () {
1331+ // If LSC caching is globally disabled then builder won't set
1332+ // caching bit in sampler message header. So return without doing anything.
1333+ //
1334+ // If current object is kernel:
1335+ // * Return without doing anything if spill size <= threshold
1336+ // ie, LSC caching bit is set in sampler message header.
1337+ // * If spill size > threshold, reset cache bit in sampler
1338+ // message header.
1339+ //
1340+ // If current object is a stack call function:
1341+ // * Since we cannot tell scratch space size of entire call
1342+ // graph, do nothing and return. For stack call, default
1343+ // behavior is that LSC caching bit is set. One can still
1344+ // disable LSC caching globally.
1345+ if (!builder.getOption (vISA_enableSamplerLSCCaching))
1346+ return ;
1347+
1348+ if (fg.getIsStackCallFunc ())
1349+ return ;
1350+
1351+ const auto spillSizeThreshold =
1352+ builder.getuint32Option (vISA_samplerLSCCachingThreshold);
1353+
1354+ const auto &jitInfo = builder.getJitInfo ();
1355+ if (jitInfo->stats .spillMemUsed <= spillSizeThreshold)
1356+ return ;
1357+
1358+ for (auto &entry : kernel.samplerWithLSCBacking ) {
1359+ auto *inst = entry.inst ;
1360+ auto opndNum = entry.opndNum ;
1361+ auto LSCBackingBit = entry.bitPos ;
1362+ // We need to reset bit# LSCBackingBit, create new G4_Imm and assign it to
1363+ // inst.
1364+ int64_t imm64 = inst->getSrc (opndNum)->asImm ()->getInt ();
1365+ // Reset bit in imm64
1366+ imm64 ^= (1ll << LSCBackingBit);
1367+ auto *newImm = builder.createImm (imm64, Type_UD);
1368+ inst->setSrc (newImm, opndNum);
1369+ }
1370+ }
13251371void Optimizer::FoldAddrImmediate () {
13261372 AddrSubReg_Node *addrRegInfo =
13271373 new AddrSubReg_Node[builder.getNumAddrRegisters ()];
0 commit comments