@@ -940,13 +940,14 @@ static bool isExtOperandBaled(Use &U, const GenXBaling *Baling) {
940940
941941void addKernelAttrsFromMetadata (VISAKernel &Kernel, const KernelMetadata &KM,
942942 const GenXSubtarget* Subtarget) {
943+ IGC_ASSERT (Subtarget);
943944 unsigned Val = KM.getSLMSize ();
944945 if (Val) {
945946 // Compute the slm size in KB and roundup to power of 2.
946947 Val = alignTo (Val, 1024 ) / 1024 ;
947948 if (!isPowerOf2_64 (Val))
948949 Val = NextPowerOf2 (Val);
949- unsigned MaxSLMSize = 64 ;
950+ unsigned MaxSLMSize = Subtarget-> getMaxSlmSize () ;
950951 if (Val > MaxSLMSize)
951952 report_fatal_error (" slm size must not exceed 64KB" );
952953 else {
@@ -959,6 +960,35 @@ void addKernelAttrsFromMetadata(VISAKernel &Kernel, const KernelMetadata &KM,
959960 }
960961 }
961962
963+ // Load thread payload from memory.
964+ if (Subtarget->hasThreadPayloadInMemory ()) {
965+ // The number of GRFs for per thread inputs (thread local IDs)
966+ unsigned NumGRFs = 0 ;
967+ bool HasImplicit = false ;
968+ for (auto Kind : KM.getArgKinds ()) {
969+ if (Kind & 0x8 )
970+ HasImplicit = true ;
971+ genx::KernelArgInfo KAI (Kind);
972+ NumGRFs += KAI.isLocalIDX () || KAI.isLocalIDY () || KAI.isLocalIDZ ();
973+ }
974+ if (Subtarget->isOCLRuntime ()) {
975+ // When CM kernel is run with OCL runtime, it is dispatched in a
976+ // special "SIMD1" mode (aka "Programmable Media Kernels").
977+ // This mode implies that we always have a "full" thread payload,
978+ // even when CM kernel does *not* have implicit arguments.
979+ // Payload format:
980+ // | 0-15 | 16 - 31 | 32 - 47 | 46 - 256 |
981+ // | localIDX | localIDY | localIDZ | unused |
982+ IGC_ASSERT (NumGRFs == 0 ); // we do not expect local_id_[x/y/z] calls
983+ NumGRFs = 1 ;
984+ } else {
985+ // One GRF for per thread input size for CM
986+ NumGRFs = std::max (HasImplicit ? 1U : 0U , NumGRFs);
987+ }
988+
989+ uint16_t Bytes = NumGRFs * Subtarget->getGRFWidth ();
990+ Kernel.AddKernelAttribute (" PerThreadInputSize" , sizeof (Bytes), &Bytes);
991+ }
962992
963993}
964994
@@ -1124,6 +1154,19 @@ bool GenXKernelBuilder::run() {
11241154
11251155static bool PatchImpArgOffset (Function *F, const GenXSubtarget *ST,
11261156 const KernelMetadata &KM) {
1157+ IGC_ASSERT (ST);
1158+ if (!ST->needsArgPatching ())
1159+ return false ;
1160+ if (F->hasFnAttribute (genx::FunctionMD::OCLRuntime))
1161+ return false ;
1162+
1163+ unsigned Idx = 0 ;
1164+ for (auto i = F->arg_begin (), e = F->arg_end (); i != e; ++i, ++Idx) {
1165+ uint8_t Kind = (KM.getArgKind (Idx));
1166+ if (Kind & 0xf8 )
1167+ return true ;
1168+ }
1169+
11271170 return false ;
11281171}
11291172
0 commit comments