@@ -55,32 +55,18 @@ type Scheduler interface {
5555 Schedule (ctx context.Context , request * schedulingtypes.LLMRequest , candidatePods []schedulingtypes.Pod ) (result * schedulingtypes.SchedulingResult , err error )
5656}
5757
58- type RequestControlPlugins struct {
59- preRequestPlugins []PreRequest
60- responseReceivedPlugins []ResponseReceived
61- responseStreamingPlugins []ResponseStreaming
62- responseCompletePlugins []ResponseComplete
63- }
64-
6558// NewDirectorWithConfig creates a new Director instance with all dependencies.
6659func NewDirectorWithConfig (
6760 datastore Datastore ,
6861 scheduler Scheduler ,
6962 admissionController AdmissionController ,
7063 config * Config ,
7164) * Director {
72- RCPlugins := RequestControlPlugins {
73- preRequestPlugins : config .preRequestPlugins ,
74- responseReceivedPlugins : config .responseReceivedPlugins ,
75- responseStreamingPlugins : config .responseStreamingPlugins ,
76- responseCompletePlugins : config .responseCompletePlugins ,
77- }
78-
7965 return & Director {
8066 datastore : datastore ,
8167 scheduler : scheduler ,
8268 admissionController : admissionController ,
83- requestControlPlugins : RCPlugins ,
69+ requestControlPlugins : * config ,
8470 defaultPriority : 0 , // define default priority explicitly
8571 }
8672}
@@ -98,7 +84,7 @@ type Director struct {
9884 datastore Datastore
9985 scheduler Scheduler
10086 admissionController AdmissionController
101- requestControlPlugins RequestControlPlugins
87+ requestControlPlugins Config
10288 // we just need a pointer to an int variable since priority is a pointer in InferenceObjective
10389 // no need to set this in the constructor, since the value we want is the default int val
10490 // and value types cannot be nil
@@ -274,7 +260,7 @@ func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []sch
274260 return pm
275261}
276262
277- // HandleResponseReceived is called when the first chunk of the response arrives .
263+ // HandleResponseReceived is called when the response headers are received .
278264func (d * Director ) HandleResponseReceived (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
279265 response := & Response {
280266 RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
@@ -330,22 +316,22 @@ func (d *Director) runPreRequestPlugins(ctx context.Context, request *scheduling
330316 schedulingResult * schedulingtypes.SchedulingResult , targetPort int ) {
331317 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
332318 for _ , plugin := range d .requestControlPlugins .preRequestPlugins {
333- loggerDebug .Info ("Running pre-request plugin" , "plugin" , plugin .TypedName ())
319+ loggerDebug .Info ("Running PreRequest plugin" , "plugin" , plugin .TypedName ())
334320 before := time .Now ()
335321 plugin .PreRequest (ctx , request , schedulingResult , targetPort )
336322 metrics .RecordPluginProcessingLatency (PreRequestExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
337- loggerDebug .Info ("Completed running pre-request plugin successfully" , "plugin" , plugin .TypedName ())
323+ loggerDebug .Info ("Completed running PreRequest plugin successfully" , "plugin" , plugin .TypedName ())
338324 }
339325}
340326
341327func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
342328 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
343329 for _ , plugin := range d .requestControlPlugins .responseReceivedPlugins {
344- loggerDebug .Info ("Running post-response plugin" , "plugin" , plugin .TypedName ())
330+ loggerDebug .Info ("Running ResponseReceived plugin" , "plugin" , plugin .TypedName ())
345331 before := time .Now ()
346332 plugin .ResponseReceived (ctx , request , response , targetPod )
347333 metrics .RecordPluginProcessingLatency (ResponseReceivedExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
348- loggerDebug .Info ("Completed running post-response plugin successfully" , "plugin" , plugin .TypedName ())
334+ loggerDebug .Info ("Completed running ResponseReceived plugin successfully" , "plugin" , plugin .TypedName ())
349335 }
350336}
351337
@@ -356,16 +342,17 @@ func (d *Director) runResponseStreamingPlugins(ctx context.Context, request *sch
356342 before := time .Now ()
357343 plugin .ResponseStreaming (ctx , request , response , targetPod )
358344 metrics .RecordPluginProcessingLatency (ResponseStreamingExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
345+ loggerTrace .Info ("Completed running ResponseStreaming plugin successfully" , "plugin" , plugin .TypedName ())
359346 }
360347}
361348
362349func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes.LLMRequest , response * Response , targetPod * backend.Pod ) {
363350 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
364351 for _ , plugin := range d .requestControlPlugins .responseCompletePlugins {
365- loggerDebug .Info ("Running post-response complete plugin" , "plugin" , plugin .TypedName ())
352+ loggerDebug .Info ("Running ResponseComplete plugin" , "plugin" , plugin .TypedName ())
366353 before := time .Now ()
367354 plugin .ResponseComplete (ctx , request , response , targetPod )
368355 metrics .RecordPluginProcessingLatency (ResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
369- loggerDebug .Info ("Completed running post-response complete plugin successfully" , "plugin" , plugin .TypedName ())
356+ loggerDebug .Info ("Completed running ResponseComplete plugin successfully" , "plugin" , plugin .TypedName ())
370357 }
371358}
0 commit comments