Skip to content

Commit 471fac0

Browse files
authored
Add support to echo the sim's pod name and namespace (#128)
* add pod name and ns headers Signed-off-by: npolshakova <[email protected]> * add pod name and ns env Signed-off-by: npolshakova <[email protected]> * Signed-off-by: npolshakova <[email protected]> feedback Signed-off-by: npolshakova <[email protected]> * reuse env var Signed-off-by: npolshakova <[email protected]> * feedback Signed-off-by: npolshakova <[email protected]> * add unset env tests Signed-off-by: npolshakova <[email protected]> --------- Signed-off-by: npolshakova <[email protected]>
1 parent 7bcee36 commit 471fac0

File tree

9 files changed

+280
-14
lines changed

9 files changed

+280
-14
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ SRC = $(shell find . -type f -name '*.go')
3636
help: ## Print help
3737
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
3838

39-
LDFLAGS ?= -extldflags '-L$(shell pwd)/lib'
39+
GO_LDFLAGS := -extldflags '-L$(shell pwd)/lib $(LDFLAGS)'
4040
CGO_ENABLED=1
4141
TOKENIZER_LIB = lib/libtokenizers.a
4242
# Extract TOKENIZER_VERSION from Dockerfile
@@ -67,7 +67,7 @@ format: ## Format Go source files
6767
.PHONY: test
6868
test: check-ginkgo download-tokenizer download-zmq ## Run tests
6969
@printf "\033[33;1m==== Running tests ====\033[0m\n"
70-
CGO_ENABLED=1 ginkgo -ldflags="$(LDFLAGS)" -v -r
70+
CGO_ENABLED=1 ginkgo -ldflags="$(GO_LDFLAGS)" -v -r
7171

7272
.PHONY: post-deploy-test
7373
post-deploy-test: ## Run post deployment tests
@@ -84,7 +84,7 @@ lint: check-golangci-lint ## Run lint
8484
.PHONY: build
8585
build: check-go download-tokenizer download-zmq
8686
@printf "\033[33;1m==== Building ====\033[0m\n"
87-
go build -ldflags="$(LDFLAGS)" -o bin/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go
87+
go build -ldflags="$(GO_LDFLAGS)" -o bin/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go
8888

8989
##@ Container Build/Push
9090

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ make image-build
156156
Please note that the default image tag is `ghcr.io/llm-d/llm-d-inference-sim:dev`. <br>
157157
The following environment variables can be used to change the image tag: `REGISTRY`, `SIM_TAG`, `IMAGE_TAG_BASE` or `IMG`.
158158

159+
Note: On macOS, use `make image-build TARGETOS=linux` to pull the correct base image.
160+
159161
### Running
160162
To run the vLLM Simulator image under Docker, run:
161163
```bash
@@ -186,6 +188,13 @@ To run the vLLM simulator in a Kubernetes cluster, run:
186188
kubectl apply -f manifests/deployment.yaml
187189
```
188190

191+
When testing locally with kind, build the docker image with `make build-image` then load into the cluster:
192+
```shell
193+
kind load --name kind docker-image ghcr.io/llm-d/llm-d-inference-sim:dev
194+
```
195+
196+
Update the `deployment.yaml` file to use the dev tag.
197+
189198
To verify the deployment is available, run:
190199
```bash
191200
kubectl get deployment vllm-llama3-8b-instruct

manifests/deployment.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ spec:
2525
image: ghcr.io/llm-d/llm-d-inference-sim:latest
2626
imagePullPolicy: IfNotPresent
2727
name: vllm-sim
28+
env:
29+
- name: POD_NAME
30+
valueFrom:
31+
fieldRef:
32+
apiVersion: v1
33+
fieldPath: metadata.name
34+
- name: POD_NAMESPACE
35+
valueFrom:
36+
fieldRef:
37+
apiVersion: v1
38+
fieldPath: metadata.namespace
2839
ports:
2940
- containerPort: 8000
3041
name: http

pkg/llm-d-inference-sim/lora_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ var _ = Describe("LoRAs", func() {
3737
client, err := startServerWithArgs(ctx, "",
3838
[]string{"cmd", "--model", model, "--mode", common.ModeEcho,
3939
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
40-
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
40+
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"}, nil)
4141
Expect(err).NotTo(HaveOccurred())
4242

4343
openaiclient := openai.NewClient(

pkg/llm-d-inference-sim/seed_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ var _ = Describe("Simulator with seed", func() {
3333
func() {
3434
ctx := context.TODO()
3535
client, err := startServerWithArgs(ctx, common.ModeRandom,
36-
[]string{"cmd", "--model", model, "--mode", common.ModeRandom, "--seed", "100"})
36+
[]string{"cmd", "--model", model, "--mode", common.ModeRandom, "--seed", "100"}, nil)
3737
Expect(err).NotTo(HaveOccurred())
3838

3939
openaiclient := openai.NewClient(

pkg/llm-d-inference-sim/simulator.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"encoding/json"
2323
"fmt"
2424
"net"
25+
"os"
2526
"strings"
2627
"sync"
2728
"sync/atomic"
@@ -46,6 +47,11 @@ const (
4647
textCompletionObject = "text_completion"
4748
chatCompletionObject = "chat.completion"
4849
chatCompletionChunkObject = "chat.completion.chunk"
50+
51+
podHeader = "x-inference-pod"
52+
namespaceHeader = "x-inference-namespace"
53+
podNameEnv = "POD_NAME"
54+
podNsEnv = "POD_NAMESPACE"
4955
)
5056

5157
// VllmSimulator simulates vLLM server supporting OpenAI API
@@ -79,6 +85,10 @@ type VllmSimulator struct {
7985
toolsValidator *openaiserverapi.Validator
8086
// kv cache functionality
8187
kvcacheHelper *kvcache.KVCacheHelper
88+
// namespace where simulator is running
89+
namespace string
90+
// pod name of simulator
91+
pod string
8292
}
8393

8494
// New creates a new VllmSimulator instance with the given logger
@@ -93,6 +103,8 @@ func New(logger logr.Logger) (*VllmSimulator, error) {
93103
reqChan: make(chan *openaiserverapi.CompletionReqCtx, 1000),
94104
toolsValidator: toolsValidtor,
95105
kvcacheHelper: nil, // kvcache helper will be created only if required after reading configuration
106+
namespace: os.Getenv(podNsEnv),
107+
pod: os.Getenv(podNameEnv),
96108
}, nil
97109
}
98110

@@ -599,9 +611,15 @@ func (s *VllmSimulator) sendResponse(isChatCompletion bool, ctx *fasthttp.Reques
599611
totalMillisToWait := s.getTimeToFirstToken(doRemotePrefill) + s.getTotalInterTokenLatency(numOfTokens)
600612
time.Sleep(time.Duration(totalMillisToWait) * time.Millisecond)
601613

602-
// TODO - maybe add pod id to response header for testing
603614
ctx.Response.Header.SetContentType("application/json")
604615
ctx.Response.Header.SetStatusCode(fasthttp.StatusOK)
616+
// Add pod and namespace information to response headers for testing/debugging
617+
if s.pod != "" {
618+
ctx.Response.Header.Add(podHeader, s.pod)
619+
}
620+
if s.namespace != "" {
621+
ctx.Response.Header.Add(namespaceHeader, s.namespace)
622+
}
605623
ctx.Response.SetBody(data)
606624

607625
s.responseSentCallback(modelName)

0 commit comments

Comments
 (0)