@@ -14,6 +14,7 @@ import (
1414
1515 "github.com/vllm-project/semantic-router/semantic-router/pkg/cache"
1616 "github.com/vllm-project/semantic-router/semantic-router/pkg/metrics"
17+ "github.com/vllm-project/semantic-router/semantic-router/pkg/observability"
1718 "github.com/vllm-project/semantic-router/semantic-router/pkg/utils/http"
1819 "github.com/vllm-project/semantic-router/semantic-router/pkg/utils/pii"
1920)
@@ -173,7 +174,7 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
173174 userContent , nonUserMessages := extractUserAndNonUserContent (openAIRequest )
174175
175176 // Perform security checks
176- if response , shouldReturn := r .performSecurityChecks (userContent , nonUserMessages ); shouldReturn {
177+ if response , shouldReturn := r .performSecurityChecks (ctx , userContent , nonUserMessages ); shouldReturn {
177178 return response , nil
178179 }
179180
@@ -187,7 +188,7 @@ func (r *OpenAIRouter) handleRequestBody(v *ext_proc.ProcessingRequest_RequestBo
187188}
188189
189190// performSecurityChecks performs PII and jailbreak detection
190- func (r * OpenAIRouter ) performSecurityChecks (userContent string , nonUserMessages []string ) (* ext_proc.ProcessingResponse , bool ) {
191+ func (r * OpenAIRouter ) performSecurityChecks (ctx * RequestContext , userContent string , nonUserMessages []string ) (* ext_proc.ProcessingResponse , bool ) {
191192 // Perform PII classification on all message content
192193 allContent := pii .ExtractAllContent (userContent , nonUserMessages )
193194
@@ -212,6 +213,13 @@ func (r *OpenAIRouter) performSecurityChecks(userContent string, nonUserMessages
212213 log .Printf ("JAILBREAK ATTEMPT BLOCKED: %s (confidence: %.3f)" , jailbreakType , confidence )
213214
214215 // Return immediate jailbreak violation response
216+ // Structured log for security block
217+ observability .LogEvent ("security_block" , map [string ]interface {}{
218+ "reason_code" : "jailbreak_detected" ,
219+ "jailbreak_type" : jailbreakType ,
220+ "confidence" : confidence ,
221+ "request_id" : ctx .RequestID ,
222+ })
215223 jailbreakResponse := http .CreateJailbreakViolationResponse (jailbreakType , confidence )
216224 return jailbreakResponse , true
217225 } else {
@@ -241,6 +249,13 @@ func (r *OpenAIRouter) handleCaching(ctx *RequestContext) (*ext_proc.ProcessingR
241249 if err != nil {
242250 log .Printf ("Error searching cache: %v" , err )
243251 } else if found {
252+ // Record and log cache hit
253+ metrics .RecordCacheHit ()
254+ observability .LogEvent ("cache_hit" , map [string ]interface {}{
255+ "request_id" : ctx .RequestID ,
256+ "model" : requestModel ,
257+ "query" : requestQuery ,
258+ })
244259 // Return immediate response from cache
245260 response := http .CreateCacheHitResponse (cachedResponse )
246261 return response , true
@@ -313,19 +328,33 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
313328 // Select the best allowed model from this category
314329 matchedModel = r .Classifier .SelectBestModelFromList (allowedModels , categoryName )
315330 log .Printf ("Selected alternative model %s that passes PII policy" , matchedModel )
331+ // Record reason code for selecting alternative due to PII
332+ metrics .RecordRoutingReasonCode ("pii_policy_alternative_selected" , matchedModel )
316333 } else {
317334 log .Printf ("No models in category %s pass PII policy, using default" , categoryName )
318335 matchedModel = r .Config .DefaultModel
319336 // Check if default model passes policy
320337 defaultAllowed , defaultDeniedPII , _ := r .PIIChecker .CheckPolicy (matchedModel , detectedPII )
321338 if ! defaultAllowed {
322339 log .Printf ("Default model also violates PII policy, returning error" )
340+ observability .LogEvent ("routing_block" , map [string ]interface {}{
341+ "reason_code" : "pii_policy_denied_default_model" ,
342+ "request_id" : ctx .RequestID ,
343+ "model" : matchedModel ,
344+ "denied_pii" : defaultDeniedPII ,
345+ })
323346 piiResponse := http .CreatePIIViolationResponse (matchedModel , defaultDeniedPII )
324347 return piiResponse , nil
325348 }
326349 }
327350 } else {
328351 log .Printf ("Could not determine category, returning PII violation for model %s" , matchedModel )
352+ observability .LogEvent ("routing_block" , map [string ]interface {}{
353+ "reason_code" : "pii_policy_denied" ,
354+ "request_id" : ctx .RequestID ,
355+ "model" : matchedModel ,
356+ "denied_pii" : deniedPII ,
357+ })
329358 piiResponse := http .CreatePIIViolationResponse (matchedModel , deniedPII )
330359 return piiResponse , nil
331360 }
@@ -424,6 +453,20 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
424453 }
425454
426455 log .Printf ("Use new model: %s" , matchedModel )
456+
457+ // Structured log for routing decision (auto)
458+ observability .LogEvent ("routing_decision" , map [string ]interface {}{
459+ "reason_code" : "auto_routing" ,
460+ "request_id" : ctx .RequestID ,
461+ "original_model" : originalModel ,
462+ "selected_model" : matchedModel ,
463+ "category" : categoryName ,
464+ "reasoning_enabled" : useReasoning ,
465+ "reasoning_effort" : effortForMetrics ,
466+ "selected_endpoint" : selectedEndpoint ,
467+ "routing_latency_ms" : time .Since (ctx .ProcessingStartTime ).Milliseconds (),
468+ })
469+ metrics .RecordRoutingReasonCode ("auto_routing" , matchedModel )
427470 }
428471 }
429472 } else if originalModel != "auto" {
@@ -438,6 +481,12 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
438481 // Continue with request on error
439482 } else if ! allowed {
440483 log .Printf ("Model %s violates PII policy, returning error" , originalModel )
484+ observability .LogEvent ("routing_block" , map [string ]interface {}{
485+ "reason_code" : "pii_policy_denied" ,
486+ "request_id" : ctx .RequestID ,
487+ "model" : originalModel ,
488+ "denied_pii" : deniedPII ,
489+ })
441490 piiResponse := http .CreatePIIViolationResponse (originalModel , deniedPII )
442491 return piiResponse , nil
443492 }
@@ -472,6 +521,19 @@ func (r *OpenAIRouter) handleModelRouting(openAIRequest *openai.ChatCompletionNe
472521 },
473522 },
474523 }
524+ // Structured log for routing decision (explicit model)
525+ observability .LogEvent ("routing_decision" , map [string ]interface {}{
526+ "reason_code" : "model_specified" ,
527+ "request_id" : ctx .RequestID ,
528+ "original_model" : originalModel ,
529+ "selected_model" : originalModel ,
530+ "category" : "" ,
531+ "reasoning_enabled" : false ,
532+ "reasoning_effort" : "" ,
533+ "selected_endpoint" : selectedEndpoint ,
534+ "routing_latency_ms" : time .Since (ctx .ProcessingStartTime ).Milliseconds (),
535+ })
536+ metrics .RecordRoutingReasonCode ("model_specified" , originalModel )
475537 }
476538
477539 // Save the actual model that will be used for token tracking
0 commit comments