@@ -160,32 +160,6 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
160160 return reqCtx , nil
161161}
162162
163- // admitRequest handles admission control to decide whether or not to accept the request
164- // based on the request priority and system saturation state.
165- func (d * Director ) admitRequest (ctx context.Context , candidatePods []backendmetrics.PodMetrics ,
166- requestPriority int , fairnessID string ) error {
167- loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
168-
169- loggerTrace .Info ("Entering Flow Control" , "priority" , requestPriority , "fairnessID" , fairnessID )
170-
171- // This will be removed in favor of a more robust implementation (Flow Control) in the very near future.
172- // TODO: Make this a configurable value.
173- // Tracking issue https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1347
174- if requestPriority >= 0 {
175- loggerTrace .Info ("Non-sheddable request bypassing saturation check." )
176- return nil
177- }
178-
179- if d .saturationDetector .IsSaturated (ctx , candidatePods ) {
180- return errutil.Error {
181- Code : errutil .InferencePoolResourceExhausted ,
182- Msg : "system saturated, sheddable request dropped" ,
183- }
184- }
185-
186- return nil
187- }
188-
189163// getCandidatePodsForScheduling gets the list of relevant endpoints for the scheduling cycle from the datastore.
190164// according to EPP protocol, if "x-gateway-destination-endpoint-subset" is set on the request metadata and specifies
191165// a subset of endpoints, only these endpoints will be considered as candidates for the scheduler.
@@ -219,17 +193,42 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
219193 }
220194
221195 podTotalCount := 0
222- podFitleredList := d .datastore .PodList (func (pm backendmetrics.PodMetrics ) bool {
196+ podFilteredList := d .datastore .PodList (func (pm backendmetrics.PodMetrics ) bool {
223197 podTotalCount ++
224198 if _ , found := endpoints [pm .GetPod ().Address ]; found {
225199 return true
226200 }
227201 return false
228202 })
229203
230- loggerTrace .Info ("filtered candidate pods by subset filtering" , "podTotalCount" , podTotalCount , "filteredCount" , len (podFitleredList ))
204+ loggerTrace .Info ("filtered candidate pods by subset filtering" , "podTotalCount" , podTotalCount , "filteredCount" , len (podFilteredList ))
231205
232- return podFitleredList
206+ return podFilteredList
207+ }
208+
209+ // admitRequest handles admission control to decide whether or not to accept the request
210+ // based on the request priority and saturation state.
211+ func (d * Director ) admitRequest (ctx context.Context , candidatePods []backendmetrics.PodMetrics , requestPriority int , fairnessID string ) error {
212+ loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
213+
214+ loggerTrace .Info ("Entering Flow Control" , "priority" , requestPriority , "fairnessID" , fairnessID )
215+
216+ // This will be removed in favor of a more robust implementation (Flow Control) in the very near future.
217+ // TODO: Make this a configurable value.
218+ // Tracking issue https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1347
219+ if requestPriority >= 0 {
220+ loggerTrace .Info ("Non-sheddable request bypassing saturation check." )
221+ return nil
222+ }
223+
224+ if d .saturationDetector .IsSaturated (ctx , candidatePods ) {
225+ return errutil.Error {
226+ Code : errutil .InferencePoolResourceExhausted ,
227+ Msg : "system saturated, sheddable request dropped" ,
228+ }
229+ }
230+
231+ return nil
233232}
234233
235234// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
0 commit comments