@@ -12,9 +12,9 @@ SPDX-License-Identifier: MIT
12
12
#include " Common_ISA_framework.h"
13
13
#include " DebugInfo.h"
14
14
#include " G4_BB.hpp"
15
+ #include " KernelCost.hpp"
15
16
#include " VISAKernel.h"
16
17
#include " VarSplit.h"
17
- #include " KernelCost.hpp"
18
18
#include " iga/IGALibrary/Models/Models.hpp"
19
19
#include " iga/IGALibrary/api/kv.hpp"
20
20
#include " visa_wa.h"
@@ -102,12 +102,12 @@ void *gtPinData::getFreeGRFInfo(unsigned &size) {
102
102
103
103
void gtPinData::setGTPinInit (void *buffer) {
104
104
vISA_ASSERT (sizeof (gtpin::igc::igc_init_t ) <= 200 ,
105
- " Check size of igc_init_t" );
105
+ " Check size of igc_init_t" );
106
106
gtpin_init = (gtpin::igc::igc_init_t *)buffer;
107
107
108
108
// reRA pass is no longer supported.
109
109
// FIXME: should we assert here?
110
- // if (gtpin_init->re_ra)
110
+ // if (gtpin_init->re_ra)
111
111
if (gtpin_init->grf_info )
112
112
kernel.getOptions ()->setOption (vISA_GetFreeGRFInfo, true );
113
113
}
@@ -139,7 +139,7 @@ void *gtPinData::getIndirRefs(unsigned int &size) {
139
139
140
140
// verify truncation is still legal
141
141
vISA_ASSERT (inst->getGenOffset () == (uint32_t )inst->getGenOffset (),
142
- " %ip out of bounds" );
142
+ " %ip out of bounds" );
143
143
144
144
if (startIp > 0 )
145
145
break ;
@@ -464,7 +464,7 @@ G4_Kernel::G4_Kernel(const PlatformInfo &pInfo, INST_LIST_NODE_ALLOCATOR &alloc,
464
464
: platformInfo(pInfo), m_options(options), m_kernelAttrs(anAttr),
465
465
m_function_id(funcId), RAType(RA_Type::UNKNOWN_RA), asmInstCount(0 ),
466
466
kernelID(0 ), fg(alloc, this , m), major_version(major),
467
- minor_version(minor), grfMode(pInfo.platform, options) {
467
+ minor_version(minor), grfMode(pInfo.platform, pInfo.grfSize, options) {
468
468
vISA_ASSERT (major < COMMON_ISA_MAJOR_VER || (major == COMMON_ISA_MAJOR_VER &&
469
469
minor <= COMMON_ISA_MINOR_VER),
470
470
" CISA version not supported by this JIT-compiler" );
@@ -591,7 +591,8 @@ void G4_Kernel::calculateSimdSize() {
591
591
(unsigned )m_kernelAttrs->getInt32KernelAttr (Attributes::ATTR_SimdSize));
592
592
if (simdSize != g4::SIMD8 && simdSize != g4::SIMD16 &&
593
593
simdSize != g4::SIMD32) {
594
- vISA_ASSERT (simdSize.value == 0 , " vISA: wrong value for SimdSize attribute" );
594
+ vISA_ASSERT (simdSize.value == 0 ,
595
+ " vISA: wrong value for SimdSize attribute" );
595
596
// pvc+: simd16; simd8 otherwise
596
597
simdSize = fg.builder ->getNativeExecSize ();
597
598
@@ -644,7 +645,8 @@ void G4_Kernel::updateKernelByRegPressure(unsigned regPressure,
644
645
largestInputReg = std::max (largestInputReg, maxRegPayloadDispatch);
645
646
}
646
647
647
- unsigned newGRF = grfMode.setModeByRegPressure (regPressure, largestInputReg, forceGRFModeUp);
648
+ unsigned newGRF = grfMode.setModeByRegPressure (regPressure, largestInputReg,
649
+ forceGRFModeUp);
648
650
649
651
if (newGRF == numRegTotal)
650
652
return ;
@@ -704,9 +706,8 @@ void G4_Kernel::evalAddrExp() {
704
706
}
705
707
}
706
708
707
- [[maybe_unused]]
708
- static std::vector<std::string> split (const std::string &str,
709
- const char *delimiter) {
709
+ [[maybe_unused]] static std::vector<std::string> split (const std::string &str,
710
+ const char *delimiter) {
710
711
std::vector<std::string> v;
711
712
std::string::size_type start = 0 ;
712
713
@@ -768,7 +769,7 @@ static iga_gen_t getIGAPlatform(TARGET_PLATFORM genPlatform) {
768
769
return platform;
769
770
}
770
771
771
- KernelDebugInfo* G4_Kernel::getKernelDebugInfo () {
772
+ KernelDebugInfo * G4_Kernel::getKernelDebugInfo () {
772
773
if (kernelDbgInfo == nullptr ) {
773
774
kernelDbgInfo = std::make_shared<KernelDebugInfo>();
774
775
}
@@ -888,8 +889,7 @@ uint32_t StackCallABI::numReservedABIGRF() const {
888
889
if (kernel->getOption (vISA_PreserveR0InR0))
889
890
return 2 ;
890
891
return 3 ;
891
- }
892
- else {
892
+ } else {
893
893
// for ABI version > 2
894
894
return 1 ;
895
895
}
@@ -1077,12 +1077,11 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
1077
1077
const uint32_t inputsStart = startGRF * getGRFSize ();
1078
1078
const uint32_t inputCount = fg.builder ->getInputCount ();
1079
1079
1080
- const int PTIS =
1081
- AlignUp (getInt32KernelAttr (Attributes::ATTR_PerThreadInputSize),
1082
- getGRFSize ());
1080
+ const int PTIS = AlignUp (
1081
+ getInt32KernelAttr (Attributes::ATTR_PerThreadInputSize), getGRFSize ());
1083
1082
1084
1083
// Checks if input_info is cross-thread-input
1085
- auto isInCrossThreadData = [&](const input_info_t * input_info) {
1084
+ auto isInCrossThreadData = [&](const input_info_t *input_info) {
1086
1085
return (uint32_t )input_info->offset >= inputsStart + PTIS;
1087
1086
};
1088
1087
@@ -1100,7 +1099,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
1100
1099
1101
1100
const uint32_t startGrfAddr =
1102
1101
getOptions ()->getuInt32Option (vISA_loadThreadPayloadStartReg) *
1103
- getGRFSize ();
1102
+ getGRFSize ();
1104
1103
1105
1104
std::vector<ArgLayout> args;
1106
1105
for (unsigned ix = 0 ; ix < inputCount; ix++) {
@@ -1131,7 +1130,7 @@ std::vector<ArgLayout> G4_Kernel::getArgumentLayout() {
1131
1130
args.emplace_back (input->dcl , dstGrfAddr, memSrc, memOff, input->size );
1132
1131
}
1133
1132
std::sort (args.begin (), args.end (),
1134
- [&](const ArgLayout &a1,const ArgLayout &a2) {
1133
+ [&](const ArgLayout &a1, const ArgLayout &a2) {
1135
1134
return a1.dstGrfAddr < a2.dstGrfAddr ;
1136
1135
});
1137
1136
return args;
@@ -1148,25 +1147,20 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
1148
1147
1149
1148
// todo: remove else branch as it is not reached at all.
1150
1149
std::stringstream ss;
1151
- const char * prefix = nullptr ;
1150
+ const char * prefix = nullptr ;
1152
1151
getOptions ()->getOption (VISA_AsmFileName, prefix);
1153
1152
if (prefix != nullptr ) {
1154
1153
// Use AsmFileName as prefix for g4/dot dumps
1155
1154
if (fg.builder ->getIsKernel ()) {
1156
1155
// entry
1157
- ss << prefix
1158
- << " ." << std::setfill (' 0' ) << std::setw (3 )
1159
- << nextDumpIndex++ << " ." << suffixIn;
1160
- }
1161
- else {
1156
+ ss << prefix << " ." << std::setfill (' 0' ) << std::setw (3 )
1157
+ << nextDumpIndex++ << " ." << suffixIn;
1158
+ } else {
1162
1159
// callee
1163
- ss << prefix
1164
- << " _f" << getFunctionId ()
1165
- << " ." << std::setfill (' 0' ) << std::setw (3 )
1166
- << nextDumpIndex++ << " ." << suffixIn;
1160
+ ss << prefix << " _f" << getFunctionId () << " ." << std::setfill (' 0' )
1161
+ << std::setw (3 ) << nextDumpIndex++ << " ." << suffixIn;
1167
1162
}
1168
- }
1169
- else {
1163
+ } else {
1170
1164
// calls to this will produce a sequence of dumps
1171
1165
// [kernel-name].000.[suffix].{dot,g4}
1172
1166
// [kernel-name].001.[suffix].{dot,g4}
@@ -1177,16 +1171,14 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
1177
1171
if (m_options->getOption (vISA_DumpUseInternalName) || name == nullptr ) {
1178
1172
if (fg.builder ->getIsKernel ()) {
1179
1173
ss << " k" << getKernelID ();
1180
- }
1181
- else {
1174
+ } else {
1182
1175
ss << " f" << getFunctionId ();
1183
1176
}
1184
- }
1185
- else {
1177
+ } else {
1186
1178
ss << name;
1187
1179
}
1188
1180
ss << " ." << std::setfill (' 0' ) << std::setw (3 ) << nextDumpIndex++ << " ."
1189
- << suffixIn;
1181
+ << suffixIn;
1190
1182
}
1191
1183
std::string baseName = sanitizePathString (ss.str ());
1192
1184
@@ -1197,9 +1189,7 @@ void G4_Kernel::dumpToFile(const std::string &suffixIn, bool forceG4Dump) {
1197
1189
dumpG4Internal (baseName);
1198
1190
}
1199
1191
1200
- void G4_Kernel::dumpToConsole () {
1201
- dumpG4InternalTo (std::cout);
1202
- }
1192
+ void G4_Kernel::dumpToConsole () { dumpG4InternalTo (std::cout); }
1203
1193
1204
1194
void G4_Kernel::emitDeviceAsm (std::ostream &os, const void *binary,
1205
1195
uint32_t binarySize) {
@@ -1530,7 +1520,8 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1530
1520
}
1531
1521
if (jitInfo->stats .numGRFSpillFillWeighted > 0 ) {
1532
1522
os << " \n "
1533
- << " //.spill GRF est. ref count " << jitInfo->stats .numGRFSpillFillWeighted ;
1523
+ << " //.spill GRF est. ref count "
1524
+ << jitInfo->stats .numGRFSpillFillWeighted ;
1534
1525
}
1535
1526
if (jitInfo->stats .numFlagSpillStore > 0 ) {
1536
1527
os << " \n //.spill flag store " << jitInfo->stats .numFlagSpillStore ;
@@ -1682,8 +1673,7 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1682
1673
os << " | " << std::right << std::setw (COLW_SIZE) << fmtHex (a.size );
1683
1674
1684
1675
// location
1685
- unsigned reg = a.dstGrfAddr / grfSize,
1686
- subRegBytes = a.dstGrfAddr % grfSize;
1676
+ unsigned reg = a.dstGrfAddr / grfSize, subRegBytes = a.dstGrfAddr % grfSize;
1687
1677
std::stringstream ssloc;
1688
1678
ssloc << " r" << reg;
1689
1679
if (subRegBytes != 0 )
@@ -1693,10 +1683,18 @@ void G4_Kernel::emitDeviceAsmHeaderComment(std::ostream &os) {
1693
1683
// from
1694
1684
std::string from;
1695
1685
switch (a.memSource ) {
1696
- case ArgLayout::MemSrc::CTI: from = " cti" ; break ;
1697
- case ArgLayout::MemSrc::PTI: from = " pti[tid]" ; break ;
1698
- case ArgLayout::MemSrc::INLINE: from = " inline" ; break ;
1699
- default : from = fmtHex (int (a.memSource )) + " ?" ; break ;
1686
+ case ArgLayout::MemSrc::CTI:
1687
+ from = " cti" ;
1688
+ break ;
1689
+ case ArgLayout::MemSrc::PTI:
1690
+ from = " pti[tid]" ;
1691
+ break ;
1692
+ case ArgLayout::MemSrc::INLINE:
1693
+ from = " inline" ;
1694
+ break ;
1695
+ default :
1696
+ from = fmtHex (int (a.memSource )) + " ?" ;
1697
+ break ;
1700
1698
}
1701
1699
std::stringstream ssf;
1702
1700
ssf << from;
@@ -1742,14 +1740,14 @@ static BlockOffsets precomputeBlockOffsets(std::ostream &os, G4_Kernel &g4k,
1742
1740
1743
1741
G4_INST *inst = (*itInst);
1744
1742
1745
- // For HW WA.
1746
- // In which, vISA may ask IGA to emit some additional instructions.
1747
- // For example, sync is used to make instruction aligned, and nop is
1748
- // used to support stepping in debugger.
1749
- // However, due to compaction, we might not know the exact location of
1750
- // the instruction, the sync instruction insertion has to happen during
1751
- // encoding, which is unknown for the instruction size of kernel in the
1752
- // decoding. That's the issue we have to make these changes.
1743
+ // For HW WA.
1744
+ // In which, vISA may ask IGA to emit some additional instructions.
1745
+ // For example, sync is used to make instruction aligned, and nop is
1746
+ // used to support stepping in debugger.
1747
+ // However, due to compaction, we might not know the exact location of
1748
+ // the instruction, the sync instruction insertion has to happen during
1749
+ // encoding, which is unknown for the instruction size of kernel in the
1750
+ // decoding. That's the issue we have to make these changes.
1753
1751
if (inst->isCachelineAligned ()) {
1754
1752
iga::Op opcode = kv.getOpcode (currPc);
1755
1753
// There could be multiple sync.nop instructions emitted by IGA to
@@ -1893,7 +1891,7 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
1893
1891
// tryPrintLable - check if the given label is already printed with the given
1894
1892
// pc. Print it if not, and skip it if yes.
1895
1893
auto tryPrintLabel = [&os, &printedLabels](int32_t label_pc,
1896
- const std::string& label_name) {
1894
+ const std::string & label_name) {
1897
1895
auto label_pair = std::make_pair (label_pc, label_name);
1898
1896
// skip if the same label in the set
1899
1897
if (printedLabels.find (label_pair) != printedLabels.end ())
@@ -1958,8 +1956,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
1958
1956
(*itBB)->emitInstructionSourceLineMapping (os, itInst);
1959
1957
}
1960
1958
1961
- uint32_t fmtOpts = IGA_FORMATTING_OPTS_DEFAULT |
1962
- IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
1959
+ uint32_t fmtOpts =
1960
+ IGA_FORMATTING_OPTS_DEFAULT | IGA_FORMATTING_OPT_PRINT_BFNEXPRS;
1963
1961
if (getOption (vISA_PrintHexFloatInAsm))
1964
1962
fmtOpts |= IGA_FORMATTING_OPT_PRINT_HEX_FLOATS;
1965
1963
if (!getOption (vISA_noLdStAsmSyntax))
@@ -2012,7 +2010,8 @@ void G4_Kernel::emitDeviceAsmInstructionsIga(std::ostream &os,
2012
2010
2013
2011
formatToInstToStream (pc, os);
2014
2012
2015
- (*itBB)->emitBasicInstructionComment (os, itInst, suppressRegs, lastRegs, pc);
2013
+ (*itBB)->emitBasicInstructionComment (os, itInst, suppressRegs, lastRegs,
2014
+ pc);
2016
2015
os << " \n " ;
2017
2016
2018
2017
pc += kv.getInstSize (pc);
@@ -2122,8 +2121,8 @@ unsigned G4_Kernel::getSRFInWords() {
2122
2121
2123
2122
// GRF modes supported by HW
2124
2123
// There must be at least one Config that is VRTEnable for each platform
2125
- GRFMode::GRFMode (const TARGET_PLATFORM plat, Options *op)
2126
- : platform(plat), options(op) {
2124
+ GRFMode::GRFMode (const TARGET_PLATFORM plat, unsigned regSize, Options *op)
2125
+ : platform(plat), grfSize(regSize), options(op) {
2127
2126
switch (platform) {
2128
2127
case Xe_XeHPSDV:
2129
2128
case Xe_DG2:
@@ -2168,12 +2167,14 @@ GRFMode::GRFMode(const TARGET_PLATFORM plat, Options *op)
2168
2167
// Set lower bound GRF
2169
2168
unsigned minGRF = op->getuInt32Option (vISA_MinGRFNum);
2170
2169
lowerBoundGRF = minGRF > 0 ? minGRF : configs.front ().numGRF ;
2171
- vISA_ASSERT (isValidNumGRFs (lowerBoundGRF), " Invalid lower bound for GRF number" );
2170
+ vISA_ASSERT (isValidNumGRFs (lowerBoundGRF),
2171
+ " Invalid lower bound for GRF number" );
2172
2172
2173
2173
// Set upper bound GRF
2174
2174
unsigned maxGRF = op->getuInt32Option (vISA_MaxGRFNum);
2175
2175
upperBoundGRF = maxGRF > 0 ? maxGRF : configs.back ().numGRF ;
2176
- vISA_ASSERT (isValidNumGRFs (upperBoundGRF), " Invalid upper bound for GRF number" );
2176
+ vISA_ASSERT (isValidNumGRFs (upperBoundGRF),
2177
+ " Invalid upper bound for GRF number" );
2177
2178
2178
2179
// Select higher GRF
2179
2180
GRFModeUpValue = op->getuInt32Option (vISA_ForceGRFModeUp);
@@ -2185,6 +2186,7 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
2185
2186
bool forceGRFModeUp) {
2186
2187
unsigned size = configs.size (), i = 0 ;
2187
2188
bool spillAllowed = getSpillThreshold () > 0 ;
2189
+ unsigned spillThresholdInRegs = getSpillThreshold () / grfSize;
2188
2190
// find appropiate GRF based on reg pressure
2189
2191
for (; i < size; i++) {
2190
2192
if (configs[i].VRTEnable && configs[i].numGRF >= lowerBoundGRF &&
@@ -2218,6 +2220,10 @@ unsigned GRFMode::setModeByRegPressure(unsigned maxRP, unsigned largestInputReg,
2218
2220
setModeByNumGRFs (lowerGRFNum);
2219
2221
}
2220
2222
return configs[currentMode].numGRF ;
2223
+ } else if (spillAllowed &&
2224
+ maxRP <= configs[i].numGRF + spillThresholdInRegs &&
2225
+ (largestInputReg + 8 ) <= configs[i].numGRF ) {
2226
+ return configs[currentMode].numGRF ;
2221
2227
}
2222
2228
}
2223
2229
}
0 commit comments