Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,20 @@ install-docker: check-container-tool ## Install app using $(CONTAINER_TOOL)
uninstall-docker: check-container-tool ## Uninstall app from $(CONTAINER_TOOL)
@echo "Stopping and removing container in $(CONTAINER_TOOL)..."
-$(CONTAINER_TOOL) stop $(PROJECT_NAME)-container && $(CONTAINER_TOOL) rm $(PROJECT_NAME)-container
@echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"
@echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)"

### Helm Targets
.PHONY: install-helm
install-helm: check-helm ## Install app using Helm
@echo "Installing chart with Helm..."
helm upgrade --install $(PROJECT_NAME) helm/$(PROJECT_NAME) --namespace default
@echo "Helm installation complete."

.PHONY: uninstall-helm
uninstall-helm: check-helm ## Uninstall app using Helm
@echo "Uninstalling chart with Helm..."
helm uninstall $(PROJECT_NAME) --namespace default
@echo "Helm uninstallation complete."

.PHONY: env
env: ## Print environment variables
Expand All @@ -152,13 +165,13 @@ env: ## Print environment variables


##@ Tools

.PHONY: check-tools
check-tools: \
check-go \
check-ginkgo \
check-golangci-lint \
check-container-tool
check-tools:
check-go \
check-ginkgo \
check-golangci-lint \
check-container-tool \
check-helm
@echo "✅ All required tools are installed."

.PHONY: check-go
Expand All @@ -182,6 +195,12 @@ check-container-tool:
echo "❌ $(CONTAINER_TOOL) is not installed."; \
echo "🔧 Try: sudo apt install $(CONTAINER_TOOL) OR brew install $(CONTAINER_TOOL)"; exit 1; }

.PHONY: check-helm
check-helm:
@command -v helm >/dev/null 2>&1 || { \
echo "❌ helm is not installed. Install it from https://helm.sh/docs/intro/install/"; exit 1; }


.PHONY: check-builder
check-builder:
@if [ -z "$(BUILDER)" ]; then \
Expand Down
23 changes: 23 additions & 0 deletions helm/llm-d-inference-sim/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
24 changes: 24 additions & 0 deletions helm/llm-d-inference-sim/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: llm-d-inference-sim
description: A Helm chart for the vLLM Simulator

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v0.4.0"
42 changes: 42 additions & 0 deletions helm/llm-d-inference-sim/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
1. Get the application URL by running these commands:
{{- if .Values.ingress.enabled }}
{{- range $host := .Values.ingress.hosts }}
{{- range .paths }}
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
{{- end }}
{{- end }}
{{- else if contains "NodePort" .Values.service.type }}
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llm-d-inference-sim.fullname" . }})
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
echo http://$NODE_IP:$NODE_PORT
{{- else if contains "LoadBalancer" .Values.service.type }}
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llm-d-inference-sim.fullname" . }}"'
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llm-d-inference-sim.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0)}}{{.}}{{ end }}"}}")
echo http://$SERVICE_IP:{{ .Values.service.port }}
{{- else if contains "ClusterIP" .Values.service.type }}
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "llm-d-inference-sim.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
export LOCAL_PORT=8001
export HOST_NAME=localhost

echo "Run 'kubectl port-forward --namespace {{ .Release.Namespace }} $POD_NAME $LOCAL_PORT:$CONTAINER_PORT &' to forward the port."
echo "Then, you can test the service with:"

curl --request POST \
--url http://$HOST_NAME:$LOCAL_PORT/v1/chat/completions \
--header 'content-type: application/json' \
--data '{
"model": "model1",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
]
}'
{{- end }}
53 changes: 53 additions & 0 deletions helm/llm-d-inference-sim/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "llm-d-inference-sim.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "llm-d-inference-sim.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "llm-d-inference-sim.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "llm-d-inference-sim.labels" -}}
helm.sh/chart: {{ include "llm-d-inference-sim.chart" . }}
{{ include "llm-d-inference-sim.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "llm-d-inference-sim.selectorLabels" -}}
app.kubernetes.io/name: {{ include "llm-d-inference-sim.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}


23 changes: 23 additions & 0 deletions helm/llm-d-inference-sim/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "llm-d-inference-sim.fullname" . }}-config
labels:
{{- include "llm-d-inference-sim.labels" . | nindent 4 }}
data:
config.yaml: |
port: {{ .Values.config.port }}
model: "{{ .Values.config.model }}"
served-model-name:
{{- toYaml .Values.config.servedModelName | nindent 4 }}
max-loras: {{ .Values.config.maxLoras }}
max-cpu-loras: {{ .Values.config.maxCpuLoras }}
max-num-seqs: {{ .Values.config.maxNumSeqs }}
max-model-len: {{ .Values.config.maxModelLen }}
lora-modules:
{{- toYaml .Values.config.loraModules | nindent 4 }}
mode: "{{ .Values.config.mode }}"
time-to-first-token: {{ .Values.config.timeToFirstToken }}
inter-token-latency: {{ .Values.config.interTokenLatency }}
kv-cache-transfer-latency: {{ .Values.config.kvCacheTransferLatency }}
seed: {{ .Values.config.seed }}
89 changes: 89 additions & 0 deletions helm/llm-d-inference-sim/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "llm-d-inference-sim.fullname" . }}
labels:
{{- include "llm-d-inference-sim.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "llm-d-inference-sim.selectorLabels" . | nindent 6 }}
template:
metadata:
annotations:
checksum/config: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "llm-d-inference-sim.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
{{- with .Values.securityContext }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- --config
- /config/config.yaml
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: http
readinessProbe:
httpGet:
path: /ready
port: http
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
volumeMounts:
- name: config
mountPath: /config
volumes:
- name: config
configMap:
name: {{ include "llm-d-inference-sim.fullname" . }}-config
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
43 changes: 43 additions & 0 deletions helm/llm-d-inference-sim/templates/ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "llm-d-inference-sim.fullname" . }}
labels:
{{- include "llm-d-inference-sim.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- with .pathType }}
pathType: {{ . }}
{{- end }}
backend:
service:
name: {{ include "llm-d-inference-sim.fullname" $ }}
port:
number: {{ $.Values.service.port }}
{{- end }}
{{- end }}
{{- end }}
15 changes: 15 additions & 0 deletions helm/llm-d-inference-sim/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "llm-d-inference-sim.fullname" . }}
labels:
{{- include "llm-d-inference-sim.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "llm-d-inference-sim.selectorLabels" . | nindent 4 }}
Loading
Loading