@@ -453,19 +453,22 @@ struct SchedConfig
453453 MASK_SETHI_ULLMAN = 1U << 2 ,
454454 MASK_CLUSTTERING = 1U << 3 ,
455455 MASK_HOLD_LIST = 1U << 4 ,
456+ MASK_NOT_ITERATE = 1U << 5 ,
456457 };
457458 unsigned Dump : 1 ;
458459 unsigned UseLatency : 1 ;
459460 unsigned UseSethiUllman : 1 ;
460461 unsigned DoClustering : 1 ;
461462 unsigned UseHoldList : 1 ;
463+ unsigned DoNotIterate : 1 ; // default 0 i.e. iterative latency-scheduling
462464
463465 explicit SchedConfig (unsigned Config)
464466 : Dump((Config & MASK_DUMP) != 0)
465467 , UseLatency((Config & MASK_LATENCY) != 0)
466468 , UseSethiUllman((Config & MASK_SETHI_ULLMAN) != 0)
467469 , DoClustering((Config & MASK_CLUSTTERING) != 0)
468470 , UseHoldList((Config & MASK_HOLD_LIST) != 0)
471+ , DoNotIterate((Config& MASK_NOT_ITERATE) != 0)
469472 {
470473 }
471474};
@@ -1348,14 +1351,24 @@ class LatencyQueue : public QueueBase {
13481351 return pseudoKills.empty () && ReadyList.empty ();
13491352 }
13501353
1351- // moving instruction from HoldList to ReadyList
1354+ // move instruction from HoldList to ReadyList,
1355+ // also update current-cycle and current-group
13521356 void advance (unsigned &CurCycle, unsigned & CurGroup)
13531357 {
13541358 if (!config.UseHoldList ) {
13551359 assert (HoldList.empty ());
1360+ // tracking cycle and group in this mode is only useful
1361+ // for understanding the scheduling result
1362+ if (!ReadyList.empty ()) {
1363+ preNode* N = ReadyList.top ();
1364+ CurCycle = std::max (CurCycle, N->getReadyCycle ());
1365+ if (N->getInst ())
1366+ CurGroup = std::max (CurGroup, GroupInfo[N->getInst ()]);
1367+ }
13561368 return ;
13571369 }
13581370 GroupInfo[nullptr ] = CurGroup;
1371+ // move inst out of hold-list based on current group and cycle
13591372 while (!HoldList.empty ()) {
13601373 preNode* N = HoldList.top ();
13611374 if (GroupInfo[N->getInst ()] <= CurGroup &&
@@ -1366,6 +1379,9 @@ class LatencyQueue : public QueueBase {
13661379 else
13671380 break ;
13681381 }
1382+ // ready-list is still emtpy, then we need to move forward to
1383+ // the next group or the next cycle so that some instructions
1384+ // can come out of the hold-list.
13691385 if (ReadyList.empty () && !HoldList.empty ()) {
13701386 preNode* N = HoldList.top ();
13711387 CurCycle = std::max (CurCycle, N->getReadyCycle ());
@@ -1432,8 +1448,7 @@ bool BB_Scheduler::scheduleBlockForLatency(unsigned& MaxPressure, bool ReassignI
14321448 unsigned NumGrfs = kernel.getNumRegTotal ();
14331449 float Ratio = NumGrfs / 128 .0f ;
14341450 // limit the iterative approach to certain platforms for now
1435- if (kernel.getOptions ()->getOption (vISA_preRA_ScheduleNoIterative) ||
1436- kernel.getPlatform () < Xe_DG2 )
1451+ if (config.DoNotIterate || kernel.getPlatform () < Xe_DG2 )
14371452 {
14381453 GTMax = GTMin = getLatencyHidingThreshold (kernel);
14391454 Ratio = 1 .0f ; // already adjusted inside getLatencyHidingThreshold
0 commit comments