Skip to content

Commit c7fa41f

Browse files
authored
remove Critical boolean from scheduling request (#921)
* remove Critical boolean from scheduling request Signed-off-by: Nir Rozenbaum <[email protected]> * fixed a glitch Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 89d1a9d commit c7fa41f

File tree

3 files changed

+4
-9
lines changed

3 files changed

+4
-9
lines changed

pkg/epp/requestcontrol/director.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,8 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
115115

116116
// Prepare LLMRequest (needed for both saturation detection and Scheduler)
117117
reqCtx.SchedulingRequest = &schedulingtypes.LLMRequest{
118-
TargetModel: reqCtx.ResolvedTargetModel,
119118
RequestId: reqCtx.Request.Headers[requtil.RequestIdHeaderKey],
120-
Critical: requestCriticality == v1alpha2.Critical,
119+
TargetModel: reqCtx.ResolvedTargetModel,
121120
Prompt: prompt,
122121
Headers: reqCtx.Request.Headers,
123122
}

pkg/epp/scheduling/scheduler_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ func TestSchedule(t *testing.T) {
4242
req: &types.LLMRequest{
4343
TargetModel: "any-model",
4444
RequestId: uuid.NewString(),
45-
Critical: true,
4645
},
4746
input: []*backendmetrics.FakePodMetrics{},
4847
wantRes: nil,
@@ -53,7 +52,6 @@ func TestSchedule(t *testing.T) {
5352
req: &types.LLMRequest{
5453
TargetModel: "critical",
5554
RequestId: uuid.NewString(),
56-
Critical: true,
5755
},
5856
// pod2 will be picked because it has relatively low queue size, with the requested
5957
// model being active, and has low KV cache.

pkg/epp/scheduling/types/types.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,18 @@ import (
2525

2626
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
2727
type LLMRequest struct {
28-
// TargetModel is the final target model after traffic split.
29-
TargetModel string
3028
// RequestId is the Envoy generated Id for the request being processed
3129
RequestId string
32-
// Critical is a boolean that specifies if a request is critical or not.
33-
Critical bool
30+
// TargetModel is the final target model after traffic split.
31+
TargetModel string
3432
// Prompt is the prompt that was sent in the request body.
3533
Prompt string
3634
// Headers is a map of the request headers.
3735
Headers map[string]string
3836
}
3937

4038
func (r *LLMRequest) String() string {
41-
return fmt.Sprintf("TargetModel: %s, Critical: %t, PromptLength: %d, Headers: %v", r.TargetModel, r.Critical, len(r.Prompt), r.Headers)
39+
return fmt.Sprintf("RequestID: %s, TargetModel: %s, PromptLength: %d, Headers: %v", r.RequestId, r.TargetModel, len(r.Prompt), r.Headers)
4240
}
4341

4442
type Pod interface {

0 commit comments

Comments
 (0)