Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ In addition, as we are using klog, the following parameters are available:
- `vmodule`: comma-separated list of pattern=N settings for file-filtered logging

## Environment variables
- `POD_NAME`: the simulator pod name. If defined, the response will contain the HTTP header `x-inference-pod` with this value
- `POD_NAME`: the simulator pod name. If defined, the response will contain the HTTP header `x-inference-pod` with this value, and the HTTP header `x-inference-port` with the port that the request was received on
- `POD_NAMESPACE`: the simulator pod namespace. If defined, the response will contain the HTTP header `x-inference-namespace` with this value

## Migrating from releases prior to v0.2.0
Expand Down
2 changes: 2 additions & 0 deletions pkg/llm-d-inference-sim/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"encoding/json"
"fmt"
"net"
"strconv"

"github.com/buaazp/fasthttprouter"
"github.com/prometheus/client_golang/prometheus/promhttp"
Expand Down Expand Up @@ -256,6 +257,7 @@ func (s *VllmSimulator) sendCompletionResponse(ctx *fasthttp.RequestCtx, resp op
// Add pod and namespace information to response headers for testing/debugging
if s.pod != "" {
ctx.Response.Header.Add(podHeader, s.pod)
ctx.Response.Header.Add(portHeader, strconv.Itoa(s.config.Port))
}
if s.namespace != "" {
ctx.Response.Header.Add(namespaceHeader, s.namespace)
Expand Down
1 change: 1 addition & 0 deletions pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
chatCompletionChunkObject = "chat.completion.chunk"

podHeader = "x-inference-pod"
portHeader = "x-inference-port"
namespaceHeader = "x-inference-namespace"
podNameEnv = "POD_NAME"
podNsEnv = "POD_NAMESPACE"
Expand Down
36 changes: 24 additions & 12 deletions pkg/llm-d-inference-sim/simulator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,18 +360,20 @@ var _ = Describe("Simulator", func() {
)

Context("namespace and pod headers", func() {
It("Should not include namespace and pod headers in chat completion response when env is not set", func() {
It("Should not include namespace, pod and port headers in chat completion response when env is not set", func() {
httpResp := sendSimpleChatRequest(nil, false)

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(BeEmpty(), "Expected namespace header not to be present")
Expect(podHeader).To(BeEmpty(), "Expected pod header not to be present")
Expect(portHeader).To(BeEmpty(), "Expected port header not to be present")
})

It("Should include namespace and pod headers in chat completion response", func() {
It("Should include namespace, pod and port headers in chat completion response", func() {
testNamespace := "test-namespace"
testPod := "test-pod"
envs := map[string]string{
Expand All @@ -380,15 +382,17 @@ var _ = Describe("Simulator", func() {
}
httpResp := sendSimpleChatRequest(envs, false)

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present")
Expect(podHeader).To(Equal(testPod), "Expected pod header to be present")
Expect(portHeader).To(Equal("8000"), "Expected port header to be present")
})

It("Should include namespace and pod headers in chat completion streaming response", func() {
It("Should include namespace, pod and port headers in chat completion streaming response", func() {
testNamespace := "stream-test-namespace"
testPod := "stream-test-pod"
envs := map[string]string{
Expand All @@ -397,26 +401,30 @@ var _ = Describe("Simulator", func() {
}
httpResp := sendSimpleChatRequest(envs, true)

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present")
Expect(podHeader).To(Equal(testPod), "Expected pod header to be present")
Expect(portHeader).To(Equal("8000"), "Expected port header to be present")
})

It("Should not include namespace and pod headers in chat completion streaming response when env is not set", func() {
It("Should not include namespace, pod and port headers in chat completion streaming response when env is not set", func() {
httpResp := sendSimpleChatRequest(nil, true)

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(BeEmpty(), "Expected namespace header not to be present")
Expect(podHeader).To(BeEmpty(), "Expected pod header not to be present")
Expect(portHeader).To(BeEmpty(), "Expected port header not to be present")
})

It("Should include namespace and pod headers in completion response", func() {
It("Should include namespace, pod and port headers in completion response", func() {
ctx := context.TODO()

testNamespace := "test-namespace"
Expand All @@ -434,15 +442,17 @@ var _ = Describe("Simulator", func() {
Expect(err).NotTo(HaveOccurred())
Expect(resp).NotTo(BeNil())

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present")
Expect(podHeader).To(Equal(testPod), "Expected pod header to be present")
Expect(portHeader).To(Equal("8000"), "Expected port header to be present")
})

It("Should include namespace and pod headers in completion streaming response", func() {
It("Should include namespace, pod and port headers in completion streaming response", func() {
ctx := context.TODO()

testNamespace := "stream-test-namespace"
Expand All @@ -460,12 +470,14 @@ var _ = Describe("Simulator", func() {
Expect(err).NotTo(HaveOccurred())
Expect(resp).NotTo(BeNil())

// Check for namespace and pod headers
// Check for namespace, pod and port headers
namespaceHeader := httpResp.Header.Get(namespaceHeader)
podHeader := httpResp.Header.Get(podHeader)
portHeader := httpResp.Header.Get(portHeader)

Expect(namespaceHeader).To(Equal(testNamespace), "Expected namespace header to be present")
Expect(podHeader).To(Equal(testPod), "Expected pod header to be present")
Expect(portHeader).To(Equal("8000"), "Expected port header to be present")
})
})

Expand Down
2 changes: 2 additions & 0 deletions pkg/llm-d-inference-sim/streaming.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bufio"
"encoding/json"
"fmt"
"strconv"
"time"

"github.com/llm-d/llm-d-inference-sim/pkg/common"
Expand Down Expand Up @@ -51,6 +52,7 @@ func (s *VllmSimulator) sendStreamingResponse(context *streamingContext, respons
// Add pod and namespace information to response headers for testing/debugging
if s.pod != "" {
context.ctx.Response.Header.Add(podHeader, s.pod)
context.ctx.Response.Header.Add(portHeader, strconv.Itoa(s.config.Port))
}
if s.namespace != "" {
context.ctx.Response.Header.Add(namespaceHeader, s.namespace)
Expand Down
Loading