Skip to content

removed datastore dependency from saturation detector #1293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func (r *Runner) Run(ctx context.Context) error {

scheduler := scheduling.NewSchedulerWithConfig(r.schedulerConfig)

saturationDetector := saturationdetector.NewDetector(sdConfig, datastore, setupLog)
saturationDetector := saturationdetector.NewDetector(sdConfig, setupLog)

director := requestcontrol.NewDirectorWithConfig(datastore, scheduler, saturationDetector, r.requestControlConfig)

Expand Down
58 changes: 31 additions & 27 deletions pkg/epp/requestcontrol/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ import (
"github.com/go-logr/logr"
"sigs.k8s.io/controller-runtime/pkg/log"

v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
Expand All @@ -43,18 +43,25 @@ import (
requtil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/request"
)

// Datastore defines the interface required by the Director.
type Datastore interface {
PoolGet() (*v1.InferencePool, error)
ObjectiveGet(modelName string) *v1alpha2.InferenceObjective
PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics
}

// Scheduler defines the interface required by the Director for scheduling.
type Scheduler interface {
Schedule(ctx context.Context, request *schedulingtypes.LLMRequest, candidatePods []schedulingtypes.Pod) (result *schedulingtypes.SchedulingResult, err error)
}

// SaturationDetector provides a signal indicating whether the backends are considered saturated.
type SaturationDetector interface {
IsSaturated(ctx context.Context) bool
IsSaturated(ctx context.Context, candidatePods []backendmetrics.PodMetrics) bool
}

// NewDirectorWithConfig creates a new Director instance with all dependencies.
func NewDirectorWithConfig(datastore datastore.Datastore, scheduler Scheduler, saturationDetector SaturationDetector, config *Config) *Director {
func NewDirectorWithConfig(datastore Datastore, scheduler Scheduler, saturationDetector SaturationDetector, config *Config) *Director {
return &Director{
datastore: datastore,
scheduler: scheduler,
Expand All @@ -66,24 +73,19 @@ func NewDirectorWithConfig(datastore datastore.Datastore, scheduler Scheduler, s

// Director orchestrates the request handling flow, including scheduling.
type Director struct {
datastore datastore.Datastore
datastore Datastore
scheduler Scheduler
saturationDetector SaturationDetector
preRequestPlugins []PreRequest
postResponsePlugins []PostResponse
}

// HandleRequest orchestrates the request lifecycle:
// 1. Parses request details.
// 2. Calls admitRequest for admission control.
// 3. Calls Scheduler.Schedule if request is approved.
// 4. Calls prepareRequest to populate RequestContext with result and call PreRequest plugins.
//
// HandleRequest orchestrates the request lifecycle.
// It always returns the requestContext even in the error case, as the request context is used in error handling.
func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestContext) (*handlers.RequestContext, error) {
logger := log.FromContext(ctx)

// --- 1. Parse Request, Resolve Target Models, and Determine Parameters ---
// Parse Request, Resolve Target Models, and Determine Parameters
requestBodyMap := reqCtx.Request.Body
var ok bool
reqCtx.IncomingModelName, ok = requestBodyMap["model"].(string)
Expand Down Expand Up @@ -134,22 +136,23 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
ctx = log.IntoContext(ctx, logger)
logger.V(logutil.DEBUG).Info("LLM request assembled")

// --- 2. Admission Control check --
if err := d.admitRequest(ctx, *infObjective.Spec.Criticality, reqCtx.FairnessID); err != nil {
return reqCtx, err
}

// --- 3. Call Scheduler (with the relevant candidate pods) ---
// Get candidate pods for scheduling
candidatePods := d.getCandidatePodsForScheduling(ctx, reqCtx.Request.Metadata)
if len(candidatePods) == 0 {
return reqCtx, errutil.Error{Code: errutil.ServiceUnavailable, Msg: "failed to find candidate pods for serving the request"}
}
result, err := d.scheduler.Schedule(ctx, reqCtx.SchedulingRequest, candidatePods)

// Admission Control check
if err := d.admitRequest(ctx, candidatePods, *infObjective.Spec.Criticality, reqCtx.FairnessID); err != nil {
return reqCtx, err
}

result, err := d.scheduler.Schedule(ctx, reqCtx.SchedulingRequest, d.toSchedulerPodMetrics(candidatePods))
if err != nil {
return reqCtx, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
}

// --- 4. Prepare Request (Populates RequestContext and call PreRequest plugins) ---
// Prepare Request (Populates RequestContext and call PreRequest plugins)
// Insert target endpoint to instruct Envoy to route requests to the specified target pod and attach the port number.
// Invoke PreRequest registered plugins.
reqCtx, err = d.prepareRequest(ctx, reqCtx, result)
Expand All @@ -161,8 +164,9 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
}

// admitRequest handles admission control to decide whether or not to accept the request
// based on the request criticality and system saturation state.
func (d *Director) admitRequest(ctx context.Context, requestCriticality v1alpha2.Criticality, fairnessID string) error {
// based on the request criticality and the saturation state of the candidate pods.
func (d *Director) admitRequest(ctx context.Context, candidatePods []backendmetrics.PodMetrics,
requestCriticality v1alpha2.Criticality, fairnessID string) error {
logger := log.FromContext(ctx)

logger.V(logutil.TRACE).Info("Entering Flow Control", "criticality", requestCriticality, "fairnessID", fairnessID)
Expand All @@ -173,7 +177,7 @@ func (d *Director) admitRequest(ctx context.Context, requestCriticality v1alpha2
}

logger.V(logutil.DEBUG).Info("Performing saturation check for non-critical request.")
if d.saturationDetector.IsSaturated(ctx) { // Assuming non-nil Saturation Detector
if d.saturationDetector.IsSaturated(ctx, candidatePods) {
return errutil.Error{
Code: errutil.InferencePoolResourceExhausted,
Msg: "system saturated, non-critical request dropped",
Expand All @@ -189,21 +193,21 @@ func (d *Director) admitRequest(ctx context.Context, requestCriticality v1alpha2
// Snapshot pod metrics from the datastore to:
// 1. Reduce concurrent access to the datastore.
// 2. Ensure consistent data during the scheduling operation of a request between all scheduling cycles.
func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMetadata map[string]any) []schedulingtypes.Pod {
func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMetadata map[string]any) []backendmetrics.PodMetrics {
loggerTrace := log.FromContext(ctx).V(logutil.TRACE)

subsetMap, found := requestMetadata[metadata.SubsetFilterNamespace].(map[string]any)
if !found {
return d.toSchedulerPodMetrics(d.datastore.PodList(backendmetrics.AllPodPredicate))
return d.datastore.PodList(backendmetrics.AllPodPredicate)
}

// Check if endpoint key is present in the subset map and ensure there is at least one value
endpointSubsetList, found := subsetMap[metadata.SubsetFilterKey].([]any)
if !found {
return d.toSchedulerPodMetrics(d.datastore.PodList(backendmetrics.AllPodPredicate))
return d.datastore.PodList(backendmetrics.AllPodPredicate)
} else if len(endpointSubsetList) == 0 {
loggerTrace.Info("found empty subset filter in request metadata, filtering all pods")
return []schedulingtypes.Pod{}
return []backendmetrics.PodMetrics{}
}

// Create a map of endpoint addresses for easy lookup
Expand All @@ -226,7 +230,7 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet

loggerTrace.Info("filtered candidate pods by subset filtering", "podTotalCount", podTotalCount, "filteredCount", len(podFitleredList))

return d.toSchedulerPodMetrics(podFitleredList)
return podFitleredList
}

// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
Expand Down
123 changes: 45 additions & 78 deletions pkg/epp/requestcontrol/director_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
Expand All @@ -54,7 +55,7 @@ type mockSaturationDetector struct {
isSaturated bool
}

func (m *mockSaturationDetector) IsSaturated(_ context.Context) bool {
func (m *mockSaturationDetector) IsSaturated(_ context.Context, _ []backendmetrics.PodMetrics) bool {
return m.isSaturated
}

Expand All @@ -67,6 +68,23 @@ func (m *mockScheduler) Schedule(_ context.Context, _ *schedulingtypes.LLMReques
return m.scheduleResults, m.scheduleErr
}

type mockDatastore struct {
pods []backendmetrics.PodMetrics
}

func (ds *mockDatastore) PoolGet() (*v1.InferencePool, error) { return nil, nil }
func (ds *mockDatastore) ObjectiveGet(_ string) *v1alpha2.InferenceObjective { return nil }
func (ds *mockDatastore) PodList(predicate func(backendmetrics.PodMetrics) bool) []backendmetrics.PodMetrics {
res := []backendmetrics.PodMetrics{}
for _, pod := range ds.pods {
if predicate(pod) {
res = append(res, pod)
}
}

return res
}

func TestDirector_HandleRequest(t *testing.T) {
ctx := logutil.NewTestLoggerIntoContext(context.Background())

Expand Down Expand Up @@ -442,119 +460,72 @@ func TestGetCandidatePodsForScheduling(t *testing.T) {
}
}

testInput := []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod1",
},
Status: corev1.PodStatus{
PodIP: "10.0.0.1",
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
},
Status: corev1.PodStatus{
PodIP: "10.0.0.2",
},
},
}

outputPod1 := &backend.Pod{
pod1 := &backend.Pod{
NamespacedName: types.NamespacedName{Name: "pod1"},
Address: "10.0.0.1",
Labels: map[string]string{},
}

outputPod2 := &backend.Pod{
pod2 := &backend.Pod{
NamespacedName: types.NamespacedName{Name: "pod2"},
Address: "10.0.0.2",
Labels: map[string]string{},
}

testInput := []backendmetrics.PodMetrics{
&backendmetrics.FakePodMetrics{Pod: pod1},
&backendmetrics.FakePodMetrics{Pod: pod2},
}

tests := []struct {
name string
metadata map[string]any
output []schedulingtypes.Pod
output []backendmetrics.PodMetrics
}{
{
name: "SubsetFilter, filter not present — return all pods",
metadata: map[string]any{},
output: []schedulingtypes.Pod{
&schedulingtypes.PodMetrics{
Pod: outputPod1,
MetricsState: backendmetrics.NewMetricsState(),
},
&schedulingtypes.PodMetrics{
Pod: outputPod2,
MetricsState: backendmetrics.NewMetricsState(),
},
},
output: testInput,
},
{
name: "SubsetFilter, namespace present filter not present — return all pods",
metadata: map[string]any{metadata.SubsetFilterNamespace: map[string]any{}},
output: []schedulingtypes.Pod{
&schedulingtypes.PodMetrics{
Pod: outputPod1,
MetricsState: backendmetrics.NewMetricsState(),
},
&schedulingtypes.PodMetrics{
Pod: outputPod2,
MetricsState: backendmetrics.NewMetricsState(),
},
},
output: testInput,
},
{
name: "SubsetFilter, filter present with empty list — return error",
metadata: makeFilterMetadata([]any{}),
output: []schedulingtypes.Pod{},
output: []backendmetrics.PodMetrics{},
},
{
name: "SubsetFilter, subset with one matching pod",
metadata: makeFilterMetadata([]any{"10.0.0.1"}),
output: []schedulingtypes.Pod{
&schedulingtypes.PodMetrics{
Pod: outputPod1,
MetricsState: backendmetrics.NewMetricsState(),
output: []backendmetrics.PodMetrics{
&backendmetrics.FakePodMetrics{
Pod: pod1,
},
},
},
{
name: "SubsetFilter, subset with multiple matching pods",
metadata: makeFilterMetadata([]any{"10.0.0.1", "10.0.0.2", "10.0.0.3"}),
output: []schedulingtypes.Pod{
&schedulingtypes.PodMetrics{
Pod: outputPod1,
MetricsState: backendmetrics.NewMetricsState(),
},
&schedulingtypes.PodMetrics{
Pod: outputPod2,
MetricsState: backendmetrics.NewMetricsState(),
},
},
output: testInput,
},
{
name: "SubsetFilter, subset with no matching pods",
metadata: makeFilterMetadata([]any{"10.0.0.3"}),
output: []schedulingtypes.Pod{},
output: []backendmetrics.PodMetrics{},
},
}

pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second, time.Second*2)
ds := datastore.NewDatastore(t.Context(), pmf)
for _, testPod := range testInput {
ds.PodUpdateOrAddIfNotExist(testPod)
}

ds := &mockDatastore{pods: testInput}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
director := NewDirectorWithConfig(ds, &mockScheduler{}, &mockSaturationDetector{}, NewConfig())

got := director.getCandidatePodsForScheduling(context.Background(), test.metadata)

diff := cmp.Diff(test.output, got, cmpopts.SortSlices(func(a, b schedulingtypes.Pod) bool {
diff := cmp.Diff(test.output, got, cmpopts.SortSlices(func(a, b backendmetrics.PodMetrics) bool {
return a.GetPod().NamespacedName.String() < b.GetPod().NamespacedName.String()
}))
if diff != "" {
Expand All @@ -578,8 +549,8 @@ func TestRandomWeightedDraw(t *testing.T) {
model: &v1alpha2.InferenceObjective{
Spec: v1alpha2.InferenceObjectiveSpec{
TargetModels: []v1alpha2.TargetModel{
{Name: "canary", Weight: pointer(50)},
{Name: "v1", Weight: pointer(50)},
{Name: "canary", Weight: ptr.To(int32(50))},
{Name: "v1", Weight: ptr.To(int32(50))},
},
},
},
Expand All @@ -590,9 +561,9 @@ func TestRandomWeightedDraw(t *testing.T) {
model: &v1alpha2.InferenceObjective{
Spec: v1alpha2.InferenceObjectiveSpec{
TargetModels: []v1alpha2.TargetModel{
{Name: "canary", Weight: pointer(25)},
{Name: "v1.1", Weight: pointer(55)},
{Name: "v1", Weight: pointer(50)},
{Name: "canary", Weight: ptr.To(int32(25))},
{Name: "v1.1", Weight: ptr.To(int32(55))},
{Name: "v1", Weight: ptr.To(int32(50))},
},
},
},
Expand All @@ -603,9 +574,9 @@ func TestRandomWeightedDraw(t *testing.T) {
model: &v1alpha2.InferenceObjective{
Spec: v1alpha2.InferenceObjectiveSpec{
TargetModels: []v1alpha2.TargetModel{
{Name: "canary", Weight: pointer(20)},
{Name: "v1.1", Weight: pointer(20)},
{Name: "v1", Weight: pointer(10)},
{Name: "canary", Weight: ptr.To(int32(20))},
{Name: "v1.1", Weight: ptr.To(int32(20))},
{Name: "v1", Weight: ptr.To(int32(10))},
},
},
},
Expand Down Expand Up @@ -683,10 +654,6 @@ func TestGetRandomPod(t *testing.T) {
}
}

func pointer(v int32) *int32 {
return &v
}

func TestDirector_HandleResponse(t *testing.T) {
pr1 := newTestPostResponse("pr1")

Expand Down
Loading