@@ -12,9 +12,9 @@ SPDX-License-Identifier: MIT
1212#include " Common_ISA_framework.h"
1313#include " DebugInfo.h"
1414#include " G4_BB.hpp"
15+ #include " KernelCost.hpp"
1516#include " VISAKernel.h"
1617#include " VarSplit.h"
17- #include " KernelCost.hpp"
1818#include " iga/IGALibrary/Models/Models.hpp"
1919#include " iga/IGALibrary/api/kv.hpp"
2020#include " visa_wa.h"
@@ -102,12 +102,12 @@ void *gtPinData::getFreeGRFInfo(unsigned &size) {
102102
103103void gtPinData::setGTPinInit (void *buffer) {
104104 vISA_ASSERT (sizeof (gtpin::igc::igc_init_t ) <= 200 ,
105- " Check size of igc_init_t" );
105+ " Check size of igc_init_t" );
106106 gtpin_init = (gtpin::igc::igc_init_t *)buffer;
107107
108108 // reRA pass is no longer supported.
109109 // FIXME: should we assert here?
110- // if (gtpin_init->re_ra)
110+ // if (gtpin_init->re_ra)
111111 if (gtpin_init->grf_info )
112112 kernel.getOptions ()->setOption (vISA_GetFreeGRFInfo, true );
113113}
@@ -139,7 +139,7 @@ void *gtPinData::getIndirRefs(unsigned int &size) {
139139
140140 // verify truncation is still legal
141141 vISA_ASSERT (inst->getGenOffset () == (uint32_t )inst->getGenOffset (),
142- " %ip out of bounds" );
142+ " %ip out of bounds" );
143143
144144 if (startIp > 0 )
145145 break ;
@@ -464,7 +464,7 @@ G4_Kernel::G4_Kernel(const PlatformInfo &pInfo, INST_LIST_NODE_ALLOCATOR &alloc,
464464 : platformInfo(pInfo), m_options(options), m_kernelAttrs(anAttr),
465465 m_function_id(funcId), RAType(RA_Type::UNKNOWN_RA), asmInstCount(0 ),
466466 kernelID(0 ), fg(alloc, this , m), major_version(major),
467- minor_version(minor), grfMode(pInfo.platform, options) {
467+ minor_version(minor), grfMode(pInfo.platform, pInfo.grfSize, options) {
468468 vISA_ASSERT (major < COMMON_ISA_MAJOR_VER || (major == COMMON_ISA_MAJOR_VER &&
469469 minor <= COMMON_ISA_MINOR_VER),
470470 " CISA version not supported by this JIT-compiler" );
@@ -591,7 +591,8 @@ void G4_Kernel::calculateSimdSize() {
591591 (unsigned )m_kernelAttrs->getInt32KernelAttr (Attributes::ATTR_SimdSize));
592592 if (simdSize != g4::SIMD8 && simdSize != g4::SIMD16 &&
593593 simdSize != g4::SIMD32) {
594- vISA_ASSERT (simdSize.value == 0 , " vISA: wrong value for SimdSize attribute" );
594+ vISA_ASSERT (simdSize.value == 0 ,
595+ " vISA: wrong value for SimdSize attribute" );
595596 // pvc+: simd16; simd8 otherwise
596597 simdSize = fg.builder ->getNativeExecSize ();
597598
@@ -644,7 +645,8 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure,
644645 largestInputReg = std::max (largestInputReg, maxRegPayloadDispatch);
645646 }
646647
647- unsigned newGRF = grfMode.setModeByRegPressure (regPressure, largestInputReg, forceGRFModeUp);
648+ unsigned newGRF = grfMode.setModeByRegPressure (regPressure, largestInputReg,
649+ forceGRFModeUp);
648650
649651 if (newGRF == numRegTotal)
650652 return ;
@@ -704,9 +706,8 @@ void G4_Kernel::evalAddrExp() {
704706 }
705707}
706708
707- [[maybe_unused]]
708- static std::vector<std::string> split (const std::string &str,
709- const char *delimiter) {
709+ [[maybe_unused]] static std::vector<std::string> split (const std::string &str,
710+ const char *delimiter) {
710711 std::vector<std::string> v;
711712 std::string::size_type start = 0 ;
712713
@@ -768,7 +769,7 @@ static iga_gen_t getIGAPlatform(TARGET_PLATFORM genPlatform) {
768769 return platform;
769770}
770771
771- KernelDebugInfo* G4_Kernel::getKernelDebugInfo () {
772+ KernelDebugInfo * G4_Kernel::getKernelDebugInfo () {
772773 if (kernelDbgInfo == nullptr ) {
773774 kernelDbgInfo = std::make_shared<KernelDebugInfo>();
774775 }
@@ -888,8 +889,7 @@ uint32_t StackCallABI::numReservedABIGRF() const {
888889 if (kernel->getOption (vISA_PreserveR0InR0))
889890 return 2 ;
890891 return 3 ;
891- }
892- else {
892+ } else {
893893 // for ABI version > 2
894894 return 1 ;
895895 }
@@ -1077,12 +1077,11 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
10771077 const uint32_t inputsStart = startGRF * getGRFSize ();
10781078 const uint32_t inputCount = fg.builder ->getInputCount ();
10791079
1080- const int PTIS =
1081- AlignUp (getInt32KernelAttr (Attributes::ATTR_PerThreadInputSize),
1082- getGRFSize ());
1080+ const int PTIS = AlignUp (
1081+ getInt32KernelAttr (Attributes::ATTR_PerThreadInputSize), getGRFSize ());
10831082
10841083 // Checks if input_info is cross-thread-input
1085- auto isInCrossThreadData = [&](const input_info_t * input_info) {
1084+ auto isInCrossThreadData = [&](const input_info_t *input_info) {
10861085 return (uint32_t )input_info->offset >= inputsStart + PTIS;
10871086 };
10881087
@@ -1100,7 +1099,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
11001099
11011100 const uint32_t startGrfAddr =
11021101 getOptions ()->getuInt32Option (vISA_loadThreadPayloadStartReg) *
1103- getGRFSize ();
1102+ getGRFSize ();
11041103
11051104 std::vector<ArgLayout> args;
11061105 for (unsigned ix = 0 ; ix < inputCount; ix++) {
@@ -1131,7 +1130,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
11311130 args.emplace_back (input->dcl , dstGrfAddr, memSrc, memOff, input->size );
11321131 }
11331132 std::sort (args.begin (), args.end (),
1134- [&](const ArgLayout &a1,const ArgLayout &a2) {
1133+ [&](const ArgLayout &a1, const ArgLayout &a2) {
11351134 return a1.dstGrfAddr < a2.dstGrfAddr ;
11361135 });
11371136 return args;
@@ -1148,25 +1147,20 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11481147
11491148 // todo: remove else branch as it is not reached at all.
11501149 std::stringstream ss;
1151- const char * prefix = nullptr ;
1150+ const char * prefix = nullptr ;
11521151 getOptions ()->getOption (VISA_AsmFileName, prefix);
11531152 if (prefix != nullptr ) {
11541153 // Use AsmFileName as prefix for g4/dot dumps
11551154 if (fg.builder ->getIsKernel ()) {
11561155 // entry
1157- ss << prefix
1158- << " ." << std::setfill (' 0' ) << std::setw (3 )
1159- << nextDumpIndex++ << " ." << suffixIn;
1160- }
1161- else {
1156+ ss << prefix << " ." << std::setfill (' 0' ) << std::setw (3 )
1157+ << nextDumpIndex++ << " ." << suffixIn;
1158+ } else {
11621159 // callee
1163- ss << prefix
1164- << " _f" << getFunctionId ()
1165- << " ." << std::setfill (' 0' ) << std::setw (3 )
1166- << nextDumpIndex++ << " ." << suffixIn;
1160+ ss << prefix << " _f" << getFunctionId () << " ." << std::setfill (' 0' )
1161+ << std::setw (3 ) << nextDumpIndex++ << " ." << suffixIn;
11671162 }
1168- }
1169- else {
1163+ } else {
11701164 // calls to this will produce a sequence of dumps
11711165 // [kernel-name].000.[suffix].{dot,g4}
11721166 // [kernel-name].001.[suffix].{dot,g4}
@@ -1177,16 +1171,14 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11771171 if (m_options->getOption (vISA_DumpUseInternalName) || name == nullptr ) {
11781172 if (fg.builder ->getIsKernel ()) {
11791173 ss << " k" << getKernelID ();
1180- }
1181- else {
1174+ } else {
11821175 ss << " f" << getFunctionId ();
11831176 }
1184- }
1185- else {
1177+ } else {
11861178 ss << name;
11871179 }
11881180 ss << " ." << std::setfill (' 0' ) << std::setw (3 ) << nextDumpIndex++ << " ."
1189- << suffixIn;
1181+ << suffixIn;
11901182 }
11911183 std::string baseName = sanitizePathString (ss.str ());
11921184
@@ -1197,9 +1189,7 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
11971189 dumpG4Internal (baseName);
11981190}
11991191
1200- void G4_Kernel::dumpToConsole () {
1201- dumpG4InternalTo (std::cout);
1202- }
1192+ void G4_Kernel::dumpToConsole () { dumpG4InternalTo (std::cout); }
12031193
12041194void G4_Kernel::emitDeviceAsm (std::ostream &os, const void *binary,
12051195 uint32_t binarySize) {
@@ -1530,7 +1520,8 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
15301520 }
15311521 if (jitInfo->stats .numGRFSpillFillWeighted > 0 ) {
15321522 os << " \n "
1533- << " //.spill GRF est. ref count " << jitInfo->stats .numGRFSpillFillWeighted ;
1523+ << " //.spill GRF est. ref count "
1524+ << jitInfo->stats .numGRFSpillFillWeighted ;
15341525 }
15351526 if (jitInfo->stats .numFlagSpillStore > 0 ) {
15361527 os << " \n //.spill flag store " << jitInfo->stats .numFlagSpillStore ;
@@ -1682,8 +1673,7 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
16821673 os << " | " << std::right << std::setw (COLW_SIZE) << fmtHex (a.size );
16831674
16841675 // location
1685- unsigned reg = a.dstGrfAddr / grfSize,
1686- subRegBytes = a.dstGrfAddr % grfSize;
1676+ unsigned reg = a.dstGrfAddr / grfSize, subRegBytes = a.dstGrfAddr % grfSize;
16871677 std::stringstream ssloc;
16881678 ssloc << " r" << reg;
16891679 if (subRegBytes != 0 )
@@ -1693,10 +1683,18 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
16931683 // from
16941684 std::string from;
16951685 switch (a.memSource ) {
1696- case ArgLayout::MemSrc::CTI: from = " cti" ; break ;
1697- case ArgLayout::MemSrc::PTI: from = " pti[tid]" ; break ;
1698- case ArgLayout::MemSrc::INLINE: from = " inline" ; break ;
1699- default : from = fmtHex (int (a.memSource )) + " ?" ; break ;
1686+ case ArgLayout::MemSrc::CTI:
1687+ from = " cti" ;
1688+ break ;
1689+ case ArgLayout::MemSrc::PTI:
1690+ from = " pti[tid]" ;
1691+ break ;
1692+ case ArgLayout::MemSrc::INLINE:
1693+ from = " inline" ;
1694+ break ;
1695+ default :
1696+ from = fmtHex (int (a.memSource )) + " ?" ;
1697+ break ;
17001698 }
17011699 std::stringstream ssf;
17021700 ssf << from;
@@ -1742,14 +1740,14 @@ static BlockOffsets precomputeBlockOffsets(std::ostream &os, G4_Kernel &g4k,
17421740
17431741 G4_INST *inst = (*itInst);
17441742
1745- // For HW WA.
1746- // In which, vISA may ask IGA to emit some additional instructions.
1747- // For example, sync is used to make instruction aligned, and nop is
1748- // used to support stepping in debugger.
1749- // However, due to compaction, we might not know the exact location of
1750- // the instruction, the sync instruction insertion has to happen during
1751- // encoding, which is unknown for the instruction size of kernel in the
1752- // decoding. That's the issue we have to make these changes.
1743+ // For HW WA.
1744+ // In which, vISA may ask IGA to emit some additional instructions.
1745+ // For example, sync is used to make instruction aligned, and nop is
1746+ // used to support stepping in debugger.
1747+ // However, due to compaction, we might not know the exact location of
1748+ // the instruction, the sync instruction insertion has to happen during
1749+ // encoding, which is unknown for the instruction size of kernel in the
1750+ // decoding. That's the issue we have to make these changes.
17531751 if (inst->isCachelineAligned ()) {
17541752 iga::Op opcode = kv.getOpcode (currPc);
17551753 // There could be multiple sync.nop instructions emitted by IGA to
@@ -1893,7 +1891,7 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
18931891 // tryPrintLable - check if the given label is already printed with the given
18941892 // pc. Print it if not, and skip it if yes.
18951893 auto tryPrintLabel = [&os, &printedLabels](int32_t label_pc,
1896- const std::string& label_name) {
1894+ const std::string & label_name) {
18971895 auto label_pair = std::make_pair (label_pc, label_name);
18981896 // skip if the same label in the set
18991897 if (printedLabels.find (label_pair) != printedLabels.end ())
@@ -1958,8 +1956,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
19581956 (*itBB)->emitInstructionSourceLineMapping (os, itInst);
19591957 }
19601958
1961- uint32_t fmtOpts = IGA_FORMATTING_OPTS_DEFAULT |
1962- IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
1959+ uint32_t fmtOpts =
1960+ IGA_FORMATTING_OPTS_DEFAULT | IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
19631961 if (getOption (vISA_PrintHexFloatInAsm))
19641962 fmtOpts |= IGA_FORMATTING_OPT_PRINT_HEX_FLOATS;
19651963 if (!getOption (vISA_noLdStAsmSyntax))
@@ -2012,7 +2010,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
20122010
20132011 formatToInstToStream (pc, os);
20142012
2015- (*itBB)->emitBasicInstructionComment (os, itInst, suppressRegs, lastRegs, pc);
2013+ (*itBB)->emitBasicInstructionComment (os, itInst, suppressRegs, lastRegs,
2014+ pc);
20162015 os << " \n " ;
20172016
20182017 pc += kv.getInstSize (pc);
@@ -2122,8 +2121,8 @@ unsigned G4_Kernel::getSRFInWords() {
21222121
21232122// GRF modes supported by HW
21242123// There must be at least one Config that is VRTEnable for each platform
2125- GRFMode::GRFMode (const TARGET_PLATFORM plat, Options *op)
2126- : platform(plat), options(op) {
2124+ GRFMode::GRFMode (const TARGET_PLATFORM plat, unsigned regSize, Options *op)
2125+ : platform(plat), grfSize(regSize), options(op) {
21272126 switch (platform) {
21282127 case Xe_XeHPSDV:
21292128 case Xe_DG2:
@@ -2168,12 +2167,14 @@ GRFMode::GRFMode(const TARGET_PLATFORM plat, Options *op)
21682167 // Set lower bound GRF
21692168 unsigned minGRF = op->getuInt32Option (vISA_MinGRFNum);
21702169 lowerBoundGRF = minGRF > 0 ? minGRF : configs.front ().numGRF ;
2171- vISA_ASSERT (isValidNumGRFs (lowerBoundGRF), " Invalid lower bound for GRF number" );
2170+ vISA_ASSERT (isValidNumGRFs (lowerBoundGRF),
2171+ " Invalid lower bound for GRF number" );
21722172
21732173 // Set upper bound GRF
21742174 unsigned maxGRF = op->getuInt32Option (vISA_MaxGRFNum);
21752175 upperBoundGRF = maxGRF > 0 ? maxGRF : configs.back ().numGRF ;
2176- vISA_ASSERT (isValidNumGRFs (upperBoundGRF), " Invalid upper bound for GRF number" );
2176+ vISA_ASSERT (isValidNumGRFs (upperBoundGRF),
2177+ " Invalid upper bound for GRF number" );
21772178
21782179 // Select higher GRF
21792180 GRFModeUpValue = op->getuInt32Option (vISA_ForceGRFModeUp);
@@ -2185,6 +2186,7 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
21852186 bool forceGRFModeUp) {
21862187 unsigned size = configs.size (), i = 0 ;
21872188 bool spillAllowed = getSpillThreshold () > 0 ;
2189+ unsigned spillThresholdInRegs = getSpillThreshold () / grfSize;
21882190 // find appropiate GRF based on reg pressure
21892191 for (; i < size; i++) {
21902192 if (configs[i].VRTEnable && configs[i].numGRF >= lowerBoundGRF &&
@@ -2218,6 +2220,10 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
22182220 setModeByNumGRFs (lowerGRFNum);
22192221 }
22202222 return configs[currentMode].numGRF ;
2223+ } else if (spillAllowed &&
2224+ maxRP <= configs[i].numGRF + spillThresholdInRegs &&
2225+ (largestInputReg + 8 ) <= configs[i].numGRF ) {
2226+ return configs[currentMode].numGRF ;
22212227 }
22222228 }
22232229 }
0 commit comments