Skip to content

Commit aafca7e

Browse files
jfuentesigcbot
authored andcommitted
Improve spill threshold handling
Improve spill threshold handling in units of GRFs calculated from byte input.
1 parent d3ca4a5 commit aafca7e

File tree

3 files changed

+71
-64
lines changed

3 files changed

+71
-64
lines changed

visa/G4_Kernel.cpp

Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ SPDX-License-Identifier: MIT
1212
#include "Common_ISA_framework.h"
1313
#include "DebugInfo.h"
1414
#include "G4_BB.hpp"
15+
#include "KernelCost.hpp"
1516
#include "VISAKernel.h"
1617
#include "VarSplit.h"
17-
#include "KernelCost.hpp"
1818
#include "iga/IGALibrary/Models/Models.hpp"
1919
#include "iga/IGALibrary/api/kv.hpp"
2020
#include "visa_wa.h"
@@ -102,12 +102,12 @@ void *gtPinData::getFreeGRFInfo(unsigned &size) {
102102

103103
void gtPinData::setGTPinInit(void *buffer) {
104104
vISA_ASSERT(sizeof(gtpin::igc::igc_init_t) <= 200,
105-
"Check size of igc_init_t");
105+
"Check size of igc_init_t");
106106
gtpin_init = (gtpin::igc::igc_init_t *)buffer;
107107

108108
// reRA pass is no longer supported.
109109
// FIXME: should we assert here?
110-
//if (gtpin_init->re_ra)
110+
// if (gtpin_init->re_ra)
111111
if (gtpin_init->grf_info)
112112
kernel.getOptions()->setOption(vISA_GetFreeGRFInfo, true);
113113
}
@@ -139,7 +139,7 @@ void *gtPinData::getIndirRefs(unsigned int &size) {
139139

140140
// verify truncation is still legal
141141
vISA_ASSERT(inst->getGenOffset() == (uint32_t)inst->getGenOffset(),
142-
"%ip out of bounds");
142+
"%ip out of bounds");
143143

144144
if (startIp > 0)
145145
break;
@@ -464,7 +464,7 @@ G4_Kernel::G4_Kernel(const PlatformInfo &pInfo, INST_LIST_NODE_ALLOCATOR &alloc,
464464
: platformInfo(pInfo), m_options(options), m_kernelAttrs(anAttr),
465465
m_function_id(funcId), RAType(RA_Type::UNKNOWN_RA), asmInstCount(0),
466466
kernelID(0), fg(alloc, this, m), major_version(major),
467-
minor_version(minor), grfMode(pInfo.platform, options) {
467+
minor_version(minor), grfMode(pInfo.platform, pInfo.grfSize, options) {
468468
vISA_ASSERT(major < COMMON_ISA_MAJOR_VER || (major == COMMON_ISA_MAJOR_VER &&
469469
minor <= COMMON_ISA_MINOR_VER),
470470
"CISA version not supported by this JIT-compiler");
@@ -591,7 +591,8 @@ void G4_Kernel::calculateSimdSize() {
591591
(unsigned)m_kernelAttrs->getInt32KernelAttr(Attributes::ATTR_SimdSize));
592592
if (simdSize != g4::SIMD8 && simdSize != g4::SIMD16 &&
593593
simdSize != g4::SIMD32) {
594-
vISA_ASSERT(simdSize.value == 0, "vISA: wrong value for SimdSize attribute");
594+
vISA_ASSERT(simdSize.value == 0,
595+
"vISA: wrong value for SimdSize attribute");
595596
// pvc+: simd16; simd8 otherwise
596597
simdSize = fg.builder->getNativeExecSize();
597598

@@ -644,7 +645,8 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure,
644645
largestInputReg = std::max(largestInputReg, maxRegPayloadDispatch);
645646
}
646647

647-
unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg, forceGRFModeUp);
648+
unsigned newGRF = grfMode.setModeByRegPressure(regPressure, largestInputReg,
649+
forceGRFModeUp);
648650

649651
if (newGRF == numRegTotal)
650652
return;
@@ -704,9 +706,8 @@ void G4_Kernel::evalAddrExp() {
704706
}
705707
}
706708

707-
[[maybe_unused]]
708-
static std::vector<std::string> split(const std::string &str,
709-
const char *delimiter) {
709+
[[maybe_unused]] static std::vector<std::string> split(const std::string &str,
710+
const char *delimiter) {
710711
std::vector<std::string> v;
711712
std::string::size_type start = 0;
712713

@@ -768,7 +769,7 @@ static iga_gen_t getIGAPlatform(TARGET_PLATFORM genPlatform) {
768769
return platform;
769770
}
770771

771-
KernelDebugInfo* G4_Kernel::getKernelDebugInfo() {
772+
KernelDebugInfo *G4_Kernel::getKernelDebugInfo() {
772773
if (kernelDbgInfo == nullptr) {
773774
kernelDbgInfo = std::make_shared<KernelDebugInfo>();
774775
}
@@ -888,8 +889,7 @@ uint32_t StackCallABI::numReservedABIGRF() const {
888889
if (kernel->getOption(vISA_PreserveR0InR0))
889890
return 2;
890891
return 3;
891-
}
892-
else {
892+
} else {
893893
// for ABI version > 2
894894
return 1;
895895
}
@@ -1077,12 +1077,11 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
10771077
const uint32_t inputsStart = startGRF * getGRFSize();
10781078
const uint32_t inputCount = fg.builder->getInputCount();
10791079

1080-
const int PTIS =
1081-
AlignUp(getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize),
1082-
getGRFSize());
1080+
const int PTIS = AlignUp(
1081+
getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize), getGRFSize());
10831082

10841083
// Checks if input_info is cross-thread-input
1085-
auto isInCrossThreadData = [&](const input_info_t * input_info) {
1084+
auto isInCrossThreadData = [&](const input_info_t *input_info) {
10861085
return (uint32_t)input_info->offset >= inputsStart + PTIS;
10871086
};
10881087

@@ -1100,7 +1099,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
11001099

11011100
const uint32_t startGrfAddr =
11021101
getOptions()->getuInt32Option(vISA_loadThreadPayloadStartReg) *
1103-
getGRFSize();
1102+
getGRFSize();
11041103

11051104
std::vector<ArgLayout> args;
11061105
for (unsigned ix = 0; ix < inputCount; ix++) {
@@ -1131,7 +1130,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
11311130
args.emplace_back(input->dcl, dstGrfAddr, memSrc, memOff, input->size);
11321131
}
11331132
std::sort(args.begin(), args.end(),
1134-
[&](const ArgLayout &a1,const ArgLayout &a2) {
1133+
[&](const ArgLayout &a1, const ArgLayout &a2) {
11351134
return a1.dstGrfAddr < a2.dstGrfAddr;
11361135
});
11371136
return args;
@@ -1148,25 +1147,20 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11481147

11491148
// todo: remove else branch as it is not reached at all.
11501149
std::stringstream ss;
1151-
const char* prefix = nullptr;
1150+
const char *prefix = nullptr;
11521151
getOptions()->getOption(VISA_AsmFileName, prefix);
11531152
if (prefix != nullptr) {
11541153
// Use AsmFileName as prefix for g4/dot dumps
11551154
if (fg.builder->getIsKernel()) {
11561155
// entry
1157-
ss << prefix
1158-
<< "." << std::setfill('0') << std::setw(3)
1159-
<< nextDumpIndex++ << "." << suffixIn;
1160-
}
1161-
else {
1156+
ss << prefix << "." << std::setfill('0') << std::setw(3)
1157+
<< nextDumpIndex++ << "." << suffixIn;
1158+
} else {
11621159
// callee
1163-
ss << prefix
1164-
<< "_f" << getFunctionId()
1165-
<< "." << std::setfill('0') << std::setw(3)
1166-
<< nextDumpIndex++ << "." << suffixIn;
1160+
ss << prefix << "_f" << getFunctionId() << "." << std::setfill('0')
1161+
<< std::setw(3) << nextDumpIndex++ << "." << suffixIn;
11671162
}
1168-
}
1169-
else {
1163+
} else {
11701164
// calls to this will produce a sequence of dumps
11711165
// [kernel-name].000.[suffix].{dot,g4}
11721166
// [kernel-name].001.[suffix].{dot,g4}
@@ -1177,16 +1171,14 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11771171
if (m_options->getOption(vISA_DumpUseInternalName) || name == nullptr) {
11781172
if (fg.builder->getIsKernel()) {
11791173
ss << "k" << getKernelID();
1180-
}
1181-
else {
1174+
} else {
11821175
ss << "f" << getFunctionId();
11831176
}
1184-
}
1185-
else {
1177+
} else {
11861178
ss << name;
11871179
}
11881180
ss << "." << std::setfill('0') << std::setw(3) << nextDumpIndex++ << "."
1189-
<< suffixIn;
1181+
<< suffixIn;
11901182
}
11911183
std::string baseName = sanitizePathString(ss.str());
11921184

@@ -1197,9 +1189,7 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11971189
dumpG4Internal(baseName);
11981190
}
11991191

1200-
void G4_Kernel::dumpToConsole() {
1201-
dumpG4InternalTo(std::cout);
1202-
}
1192+
void G4_Kernel::dumpToConsole() { dumpG4InternalTo(std::cout); }
12031193

12041194
void G4_Kernel::emitDeviceAsm(std::ostream &os, const void *binary,
12051195
uint32_t binarySize) {
@@ -1530,7 +1520,8 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
15301520
}
15311521
if (jitInfo->stats.numGRFSpillFillWeighted > 0) {
15321522
os << "\n"
1533-
<< "//.spill GRF est. ref count " << jitInfo->stats.numGRFSpillFillWeighted;
1523+
<< "//.spill GRF est. ref count "
1524+
<< jitInfo->stats.numGRFSpillFillWeighted;
15341525
}
15351526
if (jitInfo->stats.numFlagSpillStore > 0) {
15361527
os << "\n//.spill flag store " << jitInfo->stats.numFlagSpillStore;
@@ -1682,8 +1673,7 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
16821673
os << " | " << std::right << std::setw(COLW_SIZE) << fmtHex(a.size);
16831674

16841675
// location
1685-
unsigned reg = a.dstGrfAddr / grfSize,
1686-
subRegBytes = a.dstGrfAddr % grfSize;
1676+
unsigned reg = a.dstGrfAddr / grfSize, subRegBytes = a.dstGrfAddr % grfSize;
16871677
std::stringstream ssloc;
16881678
ssloc << "r" << reg;
16891679
if (subRegBytes != 0)
@@ -1693,10 +1683,18 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
16931683
// from
16941684
std::string from;
16951685
switch (a.memSource) {
1696-
case ArgLayout::MemSrc::CTI: from = "cti"; break;
1697-
case ArgLayout::MemSrc::PTI: from = "pti[tid]"; break;
1698-
case ArgLayout::MemSrc::INLINE: from = "inline"; break;
1699-
default: from = fmtHex(int(a.memSource)) + "?"; break;
1686+
case ArgLayout::MemSrc::CTI:
1687+
from = "cti";
1688+
break;
1689+
case ArgLayout::MemSrc::PTI:
1690+
from = "pti[tid]";
1691+
break;
1692+
case ArgLayout::MemSrc::INLINE:
1693+
from = "inline";
1694+
break;
1695+
default:
1696+
from = fmtHex(int(a.memSource)) + "?";
1697+
break;
17001698
}
17011699
std::stringstream ssf;
17021700
ssf << from;
@@ -1742,14 +1740,14 @@ static BlockOffsets precomputeBlockOffsets(std::ostream &os, G4_Kernel &g4k,
17421740

17431741
G4_INST *inst = (*itInst);
17441742

1745-
// For HW WA.
1746-
// In which, vISA may ask IGA to emit some additional instructions.
1747-
// For example, sync is used to make instruction aligned, and nop is
1748-
// used to support stepping in debugger.
1749-
// However, due to compaction, we might not know the exact location of
1750-
// the instruction, the sync instruction insertion has to happen during
1751-
// encoding, which is unknown for the instruction size of kernel in the
1752-
// decoding. That's the issue we have to make these changes.
1743+
// For HW WA.
1744+
// In which, vISA may ask IGA to emit some additional instructions.
1745+
// For example, sync is used to make instruction aligned, and nop is
1746+
// used to support stepping in debugger.
1747+
// However, due to compaction, we might not know the exact location of
1748+
// the instruction, the sync instruction insertion has to happen during
1749+
// encoding, which is unknown for the instruction size of kernel in the
1750+
// decoding. That's the issue we have to make these changes.
17531751
if (inst->isCachelineAligned()) {
17541752
iga::Op opcode = kv.getOpcode(currPc);
17551753
// There could be multiple sync.nop instructions emitted by IGA to
@@ -1893,7 +1891,7 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
18931891
// tryPrintLable - check if the given label is already printed with the given
18941892
// pc. Print it if not, and skip it if yes.
18951893
auto tryPrintLabel = [&os, &printedLabels](int32_t label_pc,
1896-
const std::string& label_name) {
1894+
const std::string &label_name) {
18971895
auto label_pair = std::make_pair(label_pc, label_name);
18981896
// skip if the same label in the set
18991897
if (printedLabels.find(label_pair) != printedLabels.end())
@@ -1958,8 +1956,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
19581956
(*itBB)->emitInstructionSourceLineMapping(os, itInst);
19591957
}
19601958

1961-
uint32_t fmtOpts = IGA_FORMATTING_OPTS_DEFAULT |
1962-
IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
1959+
uint32_t fmtOpts =
1960+
IGA_FORMATTING_OPTS_DEFAULT | IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
19631961
if (getOption(vISA_PrintHexFloatInAsm))
19641962
fmtOpts |= IGA_FORMATTING_OPT_PRINT_HEX_FLOATS;
19651963
if (!getOption(vISA_noLdStAsmSyntax))
@@ -2012,7 +2010,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
20122010

20132011
formatToInstToStream(pc, os);
20142012

2015-
(*itBB)->emitBasicInstructionComment(os, itInst, suppressRegs, lastRegs, pc);
2013+
(*itBB)->emitBasicInstructionComment(os, itInst, suppressRegs, lastRegs,
2014+
pc);
20162015
os << "\n";
20172016

20182017
pc += kv.getInstSize(pc);
@@ -2122,8 +2121,8 @@ unsigned G4_Kernel::getSRFInWords() {
21222121

21232122
// GRF modes supported by HW
21242123
// There must be at least one Config that is VRTEnable for each platform
2125-
GRFMode::GRFMode(const TARGET_PLATFORM plat, Options *op)
2126-
: platform(plat), options(op) {
2124+
GRFMode::GRFMode(const TARGET_PLATFORM plat, unsigned regSize, Options *op)
2125+
: platform(plat), grfSize(regSize), options(op) {
21272126
switch (platform) {
21282127
case Xe_XeHPSDV:
21292128
case Xe_DG2:
@@ -2168,12 +2167,14 @@ GRFMode::GRFMode(const TARGET_PLATFORM plat, Options *op)
21682167
// Set lower bound GRF
21692168
unsigned minGRF = op->getuInt32Option(vISA_MinGRFNum);
21702169
lowerBoundGRF = minGRF > 0 ? minGRF : configs.front().numGRF;
2171-
vISA_ASSERT(isValidNumGRFs(lowerBoundGRF), "Invalid lower bound for GRF number");
2170+
vISA_ASSERT(isValidNumGRFs(lowerBoundGRF),
2171+
"Invalid lower bound for GRF number");
21722172

21732173
// Set upper bound GRF
21742174
unsigned maxGRF = op->getuInt32Option(vISA_MaxGRFNum);
21752175
upperBoundGRF = maxGRF > 0 ? maxGRF : configs.back().numGRF;
2176-
vISA_ASSERT(isValidNumGRFs(upperBoundGRF), "Invalid upper bound for GRF number");
2176+
vISA_ASSERT(isValidNumGRFs(upperBoundGRF),
2177+
"Invalid upper bound for GRF number");
21772178

21782179
// Select higher GRF
21792180
GRFModeUpValue = op->getuInt32Option(vISA_ForceGRFModeUp);
@@ -2185,6 +2186,7 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
21852186
bool forceGRFModeUp) {
21862187
unsigned size = configs.size(), i = 0;
21872188
bool spillAllowed = getSpillThreshold() > 0;
2189+
unsigned spillThresholdInRegs = getSpillThreshold() / grfSize;
21882190
// find appropiate GRF based on reg pressure
21892191
for (; i < size; i++) {
21902192
if (configs[i].VRTEnable && configs[i].numGRF >= lowerBoundGRF &&
@@ -2218,6 +2220,10 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
22182220
setModeByNumGRFs(lowerGRFNum);
22192221
}
22202222
return configs[currentMode].numGRF;
2223+
} else if (spillAllowed &&
2224+
maxRP <= configs[i].numGRF + spillThresholdInRegs &&
2225+
(largestInputReg + 8) <= configs[i].numGRF) {
2226+
return configs[currentMode].numGRF;
22212227
}
22222228
}
22232229
}

visa/G4_Kernel.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class KernelCost;
125125
// Handles information for GRF selection
126126
class GRFMode {
127127
public:
128-
GRFMode(const TARGET_PLATFORM platform, Options *op);
128+
GRFMode(const TARGET_PLATFORM platform, unsigned regSize, Options *op);
129129

130130
void setModeByNumGRFs(unsigned grfs) {
131131
unsigned size = configs.size();
@@ -268,6 +268,7 @@ class GRFMode {
268268
unsigned upperBoundGRF;
269269
unsigned GRFModeUpValue;
270270
const TARGET_PLATFORM platform;
271+
unsigned grfSize;
271272
Options *options;
272273
};
273274

visa/IsaVerification.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4495,7 +4495,7 @@ void vISAVerifier::verifyKernelHeader() {
44954495
}
44964496
}
44974497

4498-
GRFMode GRFInfo(irBuilder->getPlatform(), options);
4498+
GRFMode GRFInfo(irBuilder->getPlatform(), irBuilder->getGRFSize(), options);
44994499
unsigned GRFNumber = GRFInfo.getMaxGRF();
45004500

45014501
// [Begin, end) is an interval for each input. We check two things

0 commit comments

Comments
 (0)