diff --git a/README.md b/README.md index d113a315..c7c88c98 100644 --- a/README.md +++ b/README.md @@ -182,7 +182,7 @@ In addition, as we are using klog, the following parameters are available: - `vmodule`: comma-separated list of pattern=N settings for file-filtered logging ## Environment variables -- `POD_NAME`: the simulator pod name. If defined, the response will contain the HTTP header `x-inference-pod` with this value +- `POD_NAME`: the simulator pod name. If defined, the response will contain the HTTP header `x-inference-pod` with this value, and the HTTP header `x-inference-port` with the port that the request was received on - `POD_NAMESPACE`: the simulator pod namespace. If defined, the response will contain the HTTP header `x-inference-namespace` with this value ## Migrating from releases prior to v0.2.0 diff --git a/pkg/llm-d-inference-sim/server.go b/pkg/llm-d-inference-sim/server.go index 6fb958c1..8247ce33 100644 --- a/pkg/llm-d-inference-sim/server.go +++ b/pkg/llm-d-inference-sim/server.go @@ -22,6 +22,7 @@ import ( "encoding/json" "fmt" "net" + "strconv" "github.com/buaazp/fasthttprouter" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -256,6 +257,7 @@ func (s *VllmSimulator) sendCompletionResponse(ctx *fasthttp.RequestCtx, resp op // Add pod and namespace information to response headers for testing/debugging if s.pod != "" { ctx.Response.Header.Add(podHeader, s.pod) + ctx.Response.Header.Add(portHeader, strconv.Itoa(s.config.Port)) } if s.namespace != "" { ctx.Response.Header.Add(namespaceHeader, s.namespace) diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index 09dcda39..d10dff80 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -48,6 +48,7 @@ const ( chatCompletionChunkObject = "chat.completion.chunk" podHeader = "x-inference-pod" + portHeader = "x-inference-port" namespaceHeader = "x-inference-namespace" podNameEnv = "POD_NAME" podNsEnv = "POD_NAMESPACE" diff --git a/pkg/llm-d-inference-sim/simulator_test.go b/pkg/llm-d-inference-sim/simulator_test.go index 5a5583b0..a461ff01 100644 --- a/pkg/llm-d-inference-sim/simulator_test.go +++ b/pkg/llm-d-inference-sim/simulator_test.go @@ -360,18 +360,20 @@ var _ = Describe("Simulator", func() { ) Context("namespace and pod headers", func() { - It("Should not include namespace and pod headers in chat completion response when env is not set", func() { + It("Should not include namespace, pod and port headers in chat completion response when env is not set", func() { httpResp := sendSimpleChatRequest(nil, false) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(BeEmpty(), "Expected namespace header not to be present") Expect(podHeader).To(BeEmpty(), "Expected pod header not to be present") + Expect(portHeader).To(BeEmpty(), "Expected port header not to be present") }) - It("Should include namespace and pod headers in chat completion response", func() { + It("Should include namespace, pod and port headers in chat completion response", func() { testNamespace := "test-namespace" testPod := "test-pod" envs := map[string]string{ @@ -380,15 +382,17 @@ var _ = Describe("Simulator", func() { } httpResp := sendSimpleChatRequest(envs, false) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present") Expect(podHeader).To(Equal(testPod), "Expected pod header to be present") + Expect(portHeader).To(Equal("8000"), "Expected port header to be present") }) - It("Should include namespace and pod headers in chat completion streaming response", func() { + It("Should include namespace, pod and port headers in chat completion streaming response", func() { testNamespace := "stream-test-namespace" testPod := "stream-test-pod" envs := map[string]string{ @@ -397,26 +401,30 @@ var _ = Describe("Simulator", func() { } httpResp := sendSimpleChatRequest(envs, true) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present") Expect(podHeader).To(Equal(testPod), "Expected pod header to be present") + Expect(portHeader).To(Equal("8000"), "Expected port header to be present") }) - It("Should not include namespace and pod headers in chat completion streaming response when env is not set", func() { + It("Should not include namespace, pod and port headers in chat completion streaming response when env is not set", func() { httpResp := sendSimpleChatRequest(nil, true) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(BeEmpty(), "Expected namespace header not to be present") Expect(podHeader).To(BeEmpty(), "Expected pod header not to be present") + Expect(portHeader).To(BeEmpty(), "Expected port header not to be present") }) - It("Should include namespace and pod headers in completion response", func() { + It("Should include namespace, pod and port headers in completion response", func() { ctx := context.TODO() testNamespace := "test-namespace" @@ -434,15 +442,17 @@ var _ = Describe("Simulator", func() { Expect(err).NotTo(HaveOccurred()) Expect(resp).NotTo(BeNil()) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present") Expect(podHeader).To(Equal(testPod), "Expected pod header to be present") + Expect(portHeader).To(Equal("8000"), "Expected port header to be present") }) - It("Should include namespace and pod headers in completion streaming response", func() { + It("Should include namespace, pod and port headers in completion streaming response", func() { ctx := context.TODO() testNamespace := "stream-test-namespace" @@ -460,12 +470,14 @@ var _ = Describe("Simulator", func() { Expect(err).NotTo(HaveOccurred()) Expect(resp).NotTo(BeNil()) - // Check for namespace and pod headers + // Check for namespace, pod and port headers namespaceHeader := httpResp.Header.Get(namespaceHeader) podHeader := httpResp.Header.Get(podHeader) + portHeader := httpResp.Header.Get(portHeader) Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present") Expect(podHeader).To(Equal(testPod), "Expected pod header to be present") + Expect(portHeader).To(Equal("8000"), "Expected port header to be present") }) }) diff --git a/pkg/llm-d-inference-sim/streaming.go b/pkg/llm-d-inference-sim/streaming.go index fac93f40..8e87af96 100644 --- a/pkg/llm-d-inference-sim/streaming.go +++ b/pkg/llm-d-inference-sim/streaming.go @@ -20,6 +20,7 @@ import ( "bufio" "encoding/json" "fmt" + "strconv" "time" "github.com/llm-d/llm-d-inference-sim/pkg/common" @@ -51,6 +52,7 @@ func (s *VllmSimulator) sendStreamingResponse(context *streamingContext, respons // Add pod and namespace information to response headers for testing/debugging if s.pod != "" { context.ctx.Response.Header.Add(podHeader, s.pod) + context.ctx.Response.Header.Add(portHeader, strconv.Itoa(s.config.Port)) } if s.namespace != "" { context.ctx.Response.Header.Add(namespaceHeader, s.namespace)