Skip to content

Commit c972fc7

Browse files
authored
move the conversion from pod metrics to scheduler pod representation one level up (#1104)
* move the converstion from pod metrics to scheduler pod representation one level up Signed-off-by: Nir Rozenbaum <[email protected]> * minor change in helper func Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 8d01161 commit c972fc7

File tree

5 files changed

+43
-44
lines changed

5 files changed

+43
-44
lines changed

conformance/testing-epp/scheduler_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
func TestSchedule(t *testing.T) {
3232
tests := []struct {
3333
name string
34-
input []backendmetrics.PodMetrics
34+
input []types.Pod
3535
req *types.LLMRequest
3636
wantRes *types.SchedulingResult
3737
err bool
@@ -47,7 +47,7 @@ func TestSchedule(t *testing.T) {
4747
},
4848
{
4949
name: "req header not set",
50-
input: []backendmetrics.PodMetrics{
50+
input: []types.Pod{
5151
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "random-endpoint"}},
5252
},
5353
req: &types.LLMRequest{
@@ -59,7 +59,7 @@ func TestSchedule(t *testing.T) {
5959
},
6060
{
6161
name: "no pods address from the candidate pods matches req header address",
62-
input: []backendmetrics.PodMetrics{
62+
input: []types.Pod{
6363
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "nonmatched-endpoint"}},
6464
},
6565
req: &types.LLMRequest{
@@ -71,7 +71,7 @@ func TestSchedule(t *testing.T) {
7171
},
7272
{
7373
name: "one pod address from the candidate pods matches req header address",
74-
input: []backendmetrics.PodMetrics{
74+
input: []types.Pod{
7575
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "nonmatched-endpoint"}},
7676
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "matched-endpoint"}},
7777
},
@@ -100,7 +100,7 @@ func TestSchedule(t *testing.T) {
100100
for _, test := range tests {
101101
t.Run(test.name, func(t *testing.T) {
102102
scheduler := NewReqHeaderBasedScheduler()
103-
got, err := scheduler.Schedule(context.Background(), test.req, types.ToSchedulerPodMetrics(test.input))
103+
got, err := scheduler.Schedule(context.Background(), test.req, test.input)
104104
if test.err != (err != nil) {
105105
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
106106
}

pkg/epp/requestcontrol/director.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,13 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
195195

196196
subsetMap, found := requestMetadata[subsetHintNamespace].(map[string]any)
197197
if !found {
198-
return schedulingtypes.ToSchedulerPodMetrics(d.datastore.PodGetAll())
198+
return d.toSchedulerPodMetrics(d.datastore.PodGetAll())
199199
}
200200

201201
// Check if endpoint key is present in the subset map and ensure there is at least one value
202202
endpointSubsetList, found := subsetMap[subsetHintKey].([]any)
203203
if !found {
204-
return schedulingtypes.ToSchedulerPodMetrics(d.datastore.PodGetAll())
204+
return d.toSchedulerPodMetrics(d.datastore.PodGetAll())
205205
} else if len(endpointSubsetList) == 0 {
206206
loggerTrace.Info("found empty subset filter in request metadata, filtering all pods")
207207
return []schedulingtypes.Pod{}
@@ -227,7 +227,7 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
227227

228228
loggerTrace.Info("filtered candidate pods by subset filtering", "podTotalCount", podTotalCount, "filteredCount", len(podFitleredList))
229229

230-
return schedulingtypes.ToSchedulerPodMetrics(podFitleredList)
230+
return d.toSchedulerPodMetrics(podFitleredList)
231231
}
232232

233233
// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
@@ -257,6 +257,15 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
257257
return reqCtx, nil
258258
}
259259

260+
func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []schedulingtypes.Pod {
261+
pm := make([]schedulingtypes.Pod, len(pods))
262+
for i, pod := range pods {
263+
pm[i] = &schedulingtypes.PodMetrics{Pod: pod.GetPod().Clone(), MetricsState: pod.GetMetrics().Clone()}
264+
}
265+
266+
return pm
267+
}
268+
260269
func (d *Director) HandleResponse(ctx context.Context, reqCtx *handlers.RequestContext) (*handlers.RequestContext, error) {
261270
response := &Response{
262271
RequestId: reqCtx.Request.Headers[requtil.RequestIdHeaderKey],

pkg/epp/scheduling/framework/scheduler_profile_test.go

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import (
2525
k8stypes "k8s.io/apimachinery/pkg/types"
2626

2727
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
28-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" // Import config for thresholds
2928
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
3029
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
3130
)
@@ -45,7 +44,7 @@ func TestSchedulePlugins(t *testing.T) {
4544
tests := []struct {
4645
name string
4746
profile *SchedulerProfile
48-
input []backendmetrics.PodMetrics
47+
input []types.Pod
4948
wantTargetPod k8stypes.NamespacedName
5049
targetPodScore float64
5150
// Number of expected pods to score (after filter)
@@ -59,10 +58,10 @@ func TestSchedulePlugins(t *testing.T) {
5958
WithScorers(NewWeightedScorer(tp1, 1), NewWeightedScorer(tp2, 1)).
6059
WithPicker(pickerPlugin).
6160
WithPostCyclePlugins(tp1, tp2),
62-
input: []backendmetrics.PodMetrics{
63-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
64-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
65-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
61+
input: []types.Pod{
62+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
63+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
64+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
6665
},
6766
wantTargetPod: k8stypes.NamespacedName{Name: "pod1"},
6867
targetPodScore: 1.1,
@@ -76,10 +75,10 @@ func TestSchedulePlugins(t *testing.T) {
7675
WithScorers(NewWeightedScorer(tp1, 60), NewWeightedScorer(tp2, 40)).
7776
WithPicker(pickerPlugin).
7877
WithPostCyclePlugins(tp1, tp2),
79-
input: []backendmetrics.PodMetrics{
80-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
81-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
82-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
78+
input: []types.Pod{
79+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
80+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
81+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
8382
},
8483
wantTargetPod: k8stypes.NamespacedName{Name: "pod1"},
8584
targetPodScore: 50,
@@ -93,10 +92,10 @@ func TestSchedulePlugins(t *testing.T) {
9392
WithScorers(NewWeightedScorer(tp1, 1), NewWeightedScorer(tp2, 1)).
9493
WithPicker(pickerPlugin).
9594
WithPostCyclePlugins(tp1, tp2),
96-
input: []backendmetrics.PodMetrics{
97-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
98-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
99-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
95+
input: []types.Pod{
96+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
97+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
98+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
10099
},
101100
numPodsToScore: 0,
102101
err: true, // no available pods to server after filter all
@@ -123,7 +122,7 @@ func TestSchedulePlugins(t *testing.T) {
123122
RequestId: uuid.NewString(),
124123
}
125124
// Run profile cycle
126-
got, err := test.profile.Run(context.Background(), request, types.NewCycleState(), types.ToSchedulerPodMetrics(test.input))
125+
got, err := test.profile.Run(context.Background(), request, types.NewCycleState(), test.input)
127126

128127
// Validate error state
129128
if test.err != (err != nil) {
@@ -136,7 +135,7 @@ func TestSchedulePlugins(t *testing.T) {
136135

137136
// Validate output
138137
wantPod := &types.PodMetrics{
139-
Pod: &backend.Pod{NamespacedName: test.wantTargetPod, Labels: make(map[string]string)},
138+
Pod: &backend.Pod{NamespacedName: test.wantTargetPod},
140139
}
141140
wantRes := &types.ProfileRunResult{
142141
TargetPod: wantPod,

pkg/epp/scheduling/scheduler_test.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func TestSchedule(t *testing.T) {
3333
tests := []struct {
3434
name string
3535
req *types.LLMRequest
36-
input []backendmetrics.PodMetrics
36+
input []types.Pod
3737
wantRes *types.SchedulingResult
3838
err bool
3939
}{
@@ -43,7 +43,7 @@ func TestSchedule(t *testing.T) {
4343
TargetModel: "any-model",
4444
RequestId: uuid.NewString(),
4545
},
46-
input: []backendmetrics.PodMetrics{},
46+
input: []types.Pod{},
4747
wantRes: nil,
4848
err: true,
4949
},
@@ -55,10 +55,10 @@ func TestSchedule(t *testing.T) {
5555
},
5656
// pod2 will be picked because it has relatively low queue size, with the requested
5757
// model being active, and has low KV cache.
58-
input: []backendmetrics.PodMetrics{
59-
&backendmetrics.FakePodMetrics{
58+
input: []types.Pod{
59+
&types.PodMetrics{
6060
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}},
61-
Metrics: &backendmetrics.MetricsState{
61+
MetricsState: &backendmetrics.MetricsState{
6262
WaitingQueueSize: 0,
6363
KVCacheUsagePercent: 0.2,
6464
MaxActiveModels: 2,
@@ -68,9 +68,9 @@ func TestSchedule(t *testing.T) {
6868
},
6969
},
7070
},
71-
&backendmetrics.FakePodMetrics{
71+
&types.PodMetrics{
7272
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
73-
Metrics: &backendmetrics.MetricsState{
73+
MetricsState: &backendmetrics.MetricsState{
7474
WaitingQueueSize: 3,
7575
KVCacheUsagePercent: 0.1,
7676
MaxActiveModels: 2,
@@ -80,9 +80,9 @@ func TestSchedule(t *testing.T) {
8080
},
8181
},
8282
},
83-
&backendmetrics.FakePodMetrics{
83+
&types.PodMetrics{
8484
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}},
85-
Metrics: &backendmetrics.MetricsState{
85+
MetricsState: &backendmetrics.MetricsState{
8686
WaitingQueueSize: 10,
8787
KVCacheUsagePercent: 0.2,
8888
MaxActiveModels: 2,
@@ -97,7 +97,7 @@ func TestSchedule(t *testing.T) {
9797
"default": {
9898
TargetPod: &types.ScoredPod{
9999
Pod: &types.PodMetrics{
100-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}, Labels: make(map[string]string)},
100+
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
101101
MetricsState: &backendmetrics.MetricsState{
102102
WaitingQueueSize: 3,
103103
KVCacheUsagePercent: 0.1,
@@ -106,7 +106,6 @@ func TestSchedule(t *testing.T) {
106106
"foo": 1,
107107
"critical": 1,
108108
},
109-
WaitingModels: map[string]int{},
110109
},
111110
},
112111
},
@@ -120,7 +119,7 @@ func TestSchedule(t *testing.T) {
120119
for _, test := range tests {
121120
t.Run(test.name, func(t *testing.T) {
122121
scheduler := NewScheduler()
123-
got, err := scheduler.Schedule(context.Background(), test.req, types.ToSchedulerPodMetrics(test.input))
122+
got, err := scheduler.Schedule(context.Background(), test.req, test.input)
124123
if test.err != (err != nil) {
125124
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
126125
}

pkg/epp/scheduling/types/types.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,6 @@ type PodMetrics struct {
7070
*backendmetrics.MetricsState
7171
}
7272

73-
func ToSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []Pod {
74-
pm := make([]Pod, 0, len(pods))
75-
for _, pod := range pods {
76-
pm = append(pm, &PodMetrics{Pod: pod.GetPod().Clone(), MetricsState: pod.GetMetrics().Clone()})
77-
}
78-
return pm
79-
}
80-
8173
// ProfileRunResult captures the profile run result.
8274
type ProfileRunResult struct {
8375
TargetPod Pod

0 commit comments

Comments
 (0)