@@ -233,7 +233,7 @@ func (sel *AISessionSelector) Select(ctx context.Context) *AISession {
233233
234234 discoveryPoolSize := int (math .Min (float64 (sel .node .OrchestratorPool .Size ()), float64 (sel .initialPoolSize )))
235235
236- if ( sel .warmPool . Size () + sel . coldPool . Size ()) == 0 {
236+ if sel .SelectorIsEmpty () {
237237 // release all orchestrators from suspension and try refresh
238238 // if there are no orchestrators in the pools
239239 clog .Infof (ctx , "refreshing sessions, no orchestrators in pools" )
@@ -275,6 +275,10 @@ func (sel *AISessionSelector) Select(ctx context.Context) *AISession {
275275 return nil
276276}
277277
278+ func (sel * AISessionSelector ) SelectorIsEmpty () bool {
279+ return sel .warmPool .Size () == 0 && sel .coldPool .Size () == 0
280+ }
281+
278282func (sel * AISessionSelector ) Complete (sess * AISession ) {
279283 if sess .Warm {
280284 sel .warmPool .Complete (sess .BroadcastSession )
@@ -311,6 +315,12 @@ func (sel *AISessionSelector) Refresh(ctx context.Context) error {
311315 continue
312316 }
313317
318+ // this should not be needed, the GetOrchestrators checks for suspension but was seeing orchestrators get back into
319+ // the pool that were suspended
320+ if sel .suspender .Suspended (sess .Transcoder ()) > 0 {
321+ clog .Infof (ctx , "skipping suspended orchestrator=%s" , sess .Transcoder ())
322+ continue
323+ }
314324 // If the constraint for the modelID are missing skip this session
315325 modelConstraint , ok := constraints .Models [sel .modelID ]
316326 if ! ok {
0 commit comments