remove Critical boolean from scheduling request (#921)

nirrozenbaum · web-flow · commit c7fa41ffb2b3 · 2025-06-04T14:54:39.000-07:00
* remove Critical boolean from scheduling request

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

* fixed a glitch

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;

---------

Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;
diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go
@@ -115,9 +115,8 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
 
 	// Prepare LLMRequest (needed for both saturation detection and Scheduler)
 	reqCtx.SchedulingRequest = &schedulingtypes.LLMRequest{
-		TargetModel: reqCtx.ResolvedTargetModel,
 		RequestId:   reqCtx.Request.Headers[requtil.RequestIdHeaderKey],
-		Critical:    requestCriticality == v1alpha2.Critical,
+		TargetModel: reqCtx.ResolvedTargetModel,
 		Prompt:      prompt,
 		Headers:     reqCtx.Request.Headers,
 	}
diff --git a/pkg/epp/scheduling/scheduler_test.go b/pkg/epp/scheduling/scheduler_test.go
@@ -42,7 +42,6 @@ func TestSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				TargetModel: "any-model",
 				RequestId:   uuid.NewString(),
-				Critical:    true,
 			},
 			input:   []*backendmetrics.FakePodMetrics{},
 			wantRes: nil,
@@ -53,7 +52,6 @@ func TestSchedule(t *testing.T) {
 			req: &types.LLMRequest{
 				TargetModel: "critical",
 				RequestId:   uuid.NewString(),
-				Critical:    true,
 			},
 			// pod2 will be picked because it has relatively low queue size, with the requested
 			// model being active, and has low KV cache.
diff --git a/pkg/epp/scheduling/types/types.go b/pkg/epp/scheduling/types/types.go
@@ -25,20 +25,18 @@ import (
 
 // LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
 type LLMRequest struct {
-	// TargetModel is the final target model after traffic split.
-	TargetModel string
 	// RequestId is the Envoy generated Id for the request being processed
 	RequestId string
-	// Critical is a boolean that specifies if a request is critical or not.
-	Critical bool
+	// TargetModel is the final target model after traffic split.
+	TargetModel string
 	// Prompt is the prompt that was sent in the request body.
 	Prompt string
 	// Headers is a map of the request headers.
 	Headers map[string]string
 }
 
 func (r *LLMRequest) String() string {
-	return fmt.Sprintf("TargetModel: %s, Critical: %t, PromptLength: %d, Headers: %v", r.TargetModel, r.Critical, len(r.Prompt), r.Headers)
+	return fmt.Sprintf("RequestID: %s, TargetModel: %s, PromptLength: %d, Headers: %v", r.RequestId, r.TargetModel, len(r.Prompt), r.Headers)
 }
 
 type Pod interface {