Skip to content

Commit 76412f3

Browse files
committed
addressed code review comments
Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 3615110 commit 76412f3

File tree

1 file changed

+28
-29
lines changed

1 file changed

+28
-29
lines changed

pkg/epp/requestcontrol/director.go

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -160,32 +160,6 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
160160
return reqCtx, nil
161161
}
162162

163-
// admitRequest handles admission control to decide whether or not to accept the request
164-
// based on the request priority and system saturation state.
165-
func (d *Director) admitRequest(ctx context.Context, candidatePods []backendmetrics.PodMetrics,
166-
requestPriority int, fairnessID string) error {
167-
loggerTrace := log.FromContext(ctx).V(logutil.TRACE)
168-
169-
loggerTrace.Info("Entering Flow Control", "priority", requestPriority, "fairnessID", fairnessID)
170-
171-
// This will be removed in favor of a more robust implementation (Flow Control) in the very near future.
172-
// TODO: Make this a configurable value.
173-
// Tracking issue https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1347
174-
if requestPriority >= 0 {
175-
loggerTrace.Info("Non-sheddable request bypassing saturation check.")
176-
return nil
177-
}
178-
179-
if d.saturationDetector.IsSaturated(ctx, candidatePods) {
180-
return errutil.Error{
181-
Code: errutil.InferencePoolResourceExhausted,
182-
Msg: "system saturated, sheddable request dropped",
183-
}
184-
}
185-
186-
return nil
187-
}
188-
189163
// getCandidatePodsForScheduling gets the list of relevant endpoints for the scheduling cycle from the datastore.
190164
// according to EPP protocol, if "x-gateway-destination-endpoint-subset" is set on the request metadata and specifies
191165
// a subset of endpoints, only these endpoints will be considered as candidates for the scheduler.
@@ -219,17 +193,42 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
219193
}
220194

221195
podTotalCount := 0
222-
podFitleredList := d.datastore.PodList(func(pm backendmetrics.PodMetrics) bool {
196+
podFilteredList := d.datastore.PodList(func(pm backendmetrics.PodMetrics) bool {
223197
podTotalCount++
224198
if _, found := endpoints[pm.GetPod().Address]; found {
225199
return true
226200
}
227201
return false
228202
})
229203

230-
loggerTrace.Info("filtered candidate pods by subset filtering", "podTotalCount", podTotalCount, "filteredCount", len(podFitleredList))
204+
loggerTrace.Info("filtered candidate pods by subset filtering", "podTotalCount", podTotalCount, "filteredCount", len(podFilteredList))
231205

232-
return podFitleredList
206+
return podFilteredList
207+
}
208+
209+
// admitRequest handles admission control to decide whether or not to accept the request
210+
// based on the request priority and saturation state.
211+
func (d *Director) admitRequest(ctx context.Context, candidatePods []backendmetrics.PodMetrics, requestPriority int, fairnessID string) error {
212+
loggerTrace := log.FromContext(ctx).V(logutil.TRACE)
213+
214+
loggerTrace.Info("Entering Flow Control", "priority", requestPriority, "fairnessID", fairnessID)
215+
216+
// This will be removed in favor of a more robust implementation (Flow Control) in the very near future.
217+
// TODO: Make this a configurable value.
218+
// Tracking issue https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1347
219+
if requestPriority >= 0 {
220+
loggerTrace.Info("Non-sheddable request bypassing saturation check.")
221+
return nil
222+
}
223+
224+
if d.saturationDetector.IsSaturated(ctx, candidatePods) {
225+
return errutil.Error{
226+
Code: errutil.InferencePoolResourceExhausted,
227+
Msg: "system saturated, sheddable request dropped",
228+
}
229+
}
230+
231+
return nil
233232
}
234233

235234
// prepareRequest populates the RequestContext and calls the registered PreRequest plugins

0 commit comments

Comments
 (0)