@@ -168,6 +168,17 @@ void Optimizer::regAlloc()
168168
169169 fg.prepareTraversal ();
170170
171+ // realR0 and BuiltInR0 are 2 different dcls.
172+ // realR0 is always tied to physical r0.
173+ // if copy of r0 isnt needed then set latter to r0 as well.
174+ // if copy of r0 is required, then let RA decide allocation of BuiltInR0.
175+ if (!R0CopyNeeded ())
176+ {
177+ // when no copy is needed, make BuiltInR0 an alias of realR0
178+ builder.getBuiltinR0 ()->setAliasDeclare (builder.getRealR0 (), 0 );
179+ builder.getBuiltinR0 ()->getRegVar ()->setPhyReg (builder.getRealR0 ()->getRegVar ()->getPhyReg (), 0 );
180+ }
181+
171182 //
172183 // assign registers
173184 //
@@ -1022,7 +1033,7 @@ void Optimizer::initOptimizations()
10221033 INITIALIZE_PASS (dumpPayload, vISA_dumpPayload, TimerID::MISC_OPTS);
10231034 INITIALIZE_PASS (normalizeRegion, vISA_EnableAlways, TimerID::MISC_OPTS);
10241035 INITIALIZE_PASS (collectStats, vISA_EnableAlways, TimerID::MISC_OPTS);
1025- INITIALIZE_PASS (createR0Copy, vISA_enablePreemption, TimerID::MISC_OPTS);
1036+ INITIALIZE_PASS (createR0Copy, vISA_EnableAlways, TimerID::MISC_OPTS);
10261037 INITIALIZE_PASS (initializePayload, vISA_InitPayload, TimerID::NUM_TIMERS);
10271038 INITIALIZE_PASS (cleanupBindless, vISA_enableCleanupBindless, TimerID::OPTIMIZER);
10281039 INITIALIZE_PASS (countGRFUsage, vISA_PrintRegUsage, TimerID::MISC_OPTS);
@@ -1416,6 +1427,23 @@ void Optimizer::accSubPostSchedule()
14161427 accSub.run ();
14171428}
14181429
1430+ bool Optimizer::R0CopyNeeded ()
1431+ {
1432+ if (kernel.getOption (vISA_enablePreemption))
1433+ {
1434+ return true ;
1435+ }
1436+
1437+ if (builder.getIsKernel () && kernel.fg .getHasStackCalls ())
1438+ {
1439+ // As per VISA ABI, last register in GRF file should
1440+ // contain copy of r0.
1441+ return true ;
1442+ }
1443+
1444+ return false ;
1445+ }
1446+
14191447int Optimizer::optimization ()
14201448{
14211449 // remove redundant message headers.
@@ -7428,6 +7456,12 @@ bool Optimizer::foldPseudoAndOr(G4_BB* bb, INST_LIST_ITER& ii)
74287456 return ;
74297457 }
74307458
7459+ // r0 copy is needed only if:
7460+ // a. pre-emption VISA option is enabled OR
7461+ // b. current object is kernel with stack calls since VISA ABI requires r0 copy to be available in a pre-defined register
7462+ if (!R0CopyNeeded ())
7463+ return ;
7464+
74317465 // Skip copying of ``copy of R0'' if it's never assigned, a case where
74327466 // ``copy of R0'' is never used. As EOT always use ``copy of R0'', that
74337467 // case only happens for synthetic tests where no practical code is
0 commit comments