@@ -22,6 +22,7 @@ import (
2222 "encoding/json"
2323 "fmt"
2424 "net"
25+ "os"
2526 "strings"
2627 "sync"
2728 "sync/atomic"
@@ -46,6 +47,11 @@ const (
4647 textCompletionObject = "text_completion"
4748 chatCompletionObject = "chat.completion"
4849 chatCompletionChunkObject = "chat.completion.chunk"
50+
51+ podHeader = "x-inference-pod"
52+ namespaceHeader = "x-inference-namespace"
53+ podNameEnv = "POD_NAME"
54+ podNsEnv = "POD_NAMESPACE"
4955)
5056
5157// VllmSimulator simulates vLLM server supporting OpenAI API
@@ -79,6 +85,10 @@ type VllmSimulator struct {
7985 toolsValidator * openaiserverapi.Validator
8086 // kv cache functionality
8187 kvcacheHelper * kvcache.KVCacheHelper
88+ // namespace where simulator is running
89+ namespace string
90+ // pod name of simulator
91+ pod string
8292}
8393
8494// New creates a new VllmSimulator instance with the given logger
@@ -93,6 +103,8 @@ func New(logger logr.Logger) (*VllmSimulator, error) {
93103 reqChan : make (chan * openaiserverapi.CompletionReqCtx , 1000 ),
94104 toolsValidator : toolsValidtor ,
95105 kvcacheHelper : nil , // kvcache helper will be created only if required after reading configuration
106+ namespace : os .Getenv (podNsEnv ),
107+ pod : os .Getenv (podNameEnv ),
96108 }, nil
97109}
98110
@@ -599,9 +611,15 @@ func (s *VllmSimulator) sendResponse(isChatCompletion bool, ctx *fasthttp.Reques
599611 totalMillisToWait := s .getTimeToFirstToken (doRemotePrefill ) + s .getTotalInterTokenLatency (numOfTokens )
600612 time .Sleep (time .Duration (totalMillisToWait ) * time .Millisecond )
601613
602- // TODO - maybe add pod id to response header for testing
603614 ctx .Response .Header .SetContentType ("application/json" )
604615 ctx .Response .Header .SetStatusCode (fasthttp .StatusOK )
616+ // Add pod and namespace information to response headers for testing/debugging
617+ if s .pod != "" {
618+ ctx .Response .Header .Add (podHeader , s .pod )
619+ }
620+ if s .namespace != "" {
621+ ctx .Response .Header .Add (namespaceHeader , s .namespace )
622+ }
605623 ctx .Response .SetBody (data )
606624
607625 s .responseSentCallback (modelName )
0 commit comments