From 68bcfd3dc16a12cee665d26731753c8ba942e2b2 Mon Sep 17 00:00:00 2001 From: Blackoutta Date: Tue, 2 Sep 2025 11:12:17 +0800 Subject: [PATCH] feat: add helm charts Signed-off-by: Blackoutta --- Makefile | 33 ++++- helm/llm-d-inference-sim/.helmignore | 23 ++++ helm/llm-d-inference-sim/Chart.yaml | 24 ++++ helm/llm-d-inference-sim/templates/NOTES.txt | 42 ++++++ .../templates/_helpers.tpl | 53 ++++++++ .../templates/configmap.yaml | 23 ++++ .../templates/deployment.yaml | 89 +++++++++++++ .../templates/ingress.yaml | 43 ++++++ .../templates/service.yaml | 15 +++ helm/llm-d-inference-sim/values.yaml | 122 ++++++++++++++++++ 10 files changed, 460 insertions(+), 7 deletions(-) create mode 100644 helm/llm-d-inference-sim/.helmignore create mode 100644 helm/llm-d-inference-sim/Chart.yaml create mode 100644 helm/llm-d-inference-sim/templates/NOTES.txt create mode 100644 helm/llm-d-inference-sim/templates/_helpers.tpl create mode 100644 helm/llm-d-inference-sim/templates/configmap.yaml create mode 100644 helm/llm-d-inference-sim/templates/deployment.yaml create mode 100644 helm/llm-d-inference-sim/templates/ingress.yaml create mode 100644 helm/llm-d-inference-sim/templates/service.yaml create mode 100644 helm/llm-d-inference-sim/values.yaml diff --git a/Makefile b/Makefile index c4fddb1d..45678c5c 100644 --- a/Makefile +++ b/Makefile @@ -142,7 +142,20 @@ install-docker: check-container-tool ## Install app using $(CONTAINER_TOOL) uninstall-docker: check-container-tool ## Uninstall app from $(CONTAINER_TOOL) @echo "Stopping and removing container in $(CONTAINER_TOOL)..." -$(CONTAINER_TOOL) stop $(PROJECT_NAME)-container && $(CONTAINER_TOOL) rm $(PROJECT_NAME)-container -@echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)" + @echo "$(CONTAINER_TOOL) uninstallation complete. Remove alias if set: unalias $(PROJECT_NAME)" + +### Helm Targets +.PHONY: install-helm +install-helm: check-helm ## Install app using Helm + @echo "Installing chart with Helm..." + helm upgrade --install $(PROJECT_NAME) helm/$(PROJECT_NAME) --namespace default + @echo "Helm installation complete." + +.PHONY: uninstall-helm +uninstall-helm: check-helm ## Uninstall app using Helm + @echo "Uninstalling chart with Helm..." + helm uninstall $(PROJECT_NAME) --namespace default + @echo "Helm uninstallation complete." .PHONY: env env: ## Print environment variables @@ -152,13 +165,13 @@ env: ## Print environment variables ##@ Tools - .PHONY: check-tools -check-tools: \ - check-go \ - check-ginkgo \ - check-golangci-lint \ - check-container-tool +check-tools: + check-go \ + check-ginkgo \ + check-golangci-lint \ + check-container-tool \ + check-helm @echo "✅ All required tools are installed." .PHONY: check-go @@ -182,6 +195,12 @@ check-container-tool: echo "❌ $(CONTAINER_TOOL) is not installed."; \ echo "🔧 Try: sudo apt install $(CONTAINER_TOOL) OR brew install $(CONTAINER_TOOL)"; exit 1; } +.PHONY: check-helm +check-helm: + @command -v helm >/dev/null 2>&1 || { \ + echo "❌ helm is not installed. Install it from https://helm.sh/docs/intro/install/"; exit 1; } + + .PHONY: check-builder check-builder: @if [ -z "$(BUILDER)" ]; then \ diff --git a/helm/llm-d-inference-sim/.helmignore b/helm/llm-d-inference-sim/.helmignore new file mode 100644 index 00000000..0e8a0eb3 --- /dev/null +++ b/helm/llm-d-inference-sim/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/llm-d-inference-sim/Chart.yaml b/helm/llm-d-inference-sim/Chart.yaml new file mode 100644 index 00000000..017b33dd --- /dev/null +++ b/helm/llm-d-inference-sim/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: llm-d-inference-sim +description: A Helm chart for the vLLM Simulator + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "v0.4.0" diff --git a/helm/llm-d-inference-sim/templates/NOTES.txt b/helm/llm-d-inference-sim/templates/NOTES.txt new file mode 100644 index 00000000..7cbb7451 --- /dev/null +++ b/helm/llm-d-inference-sim/templates/NOTES.txt @@ -0,0 +1,42 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "llm-d-inference-sim.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "llm-d-inference-sim.fullname" . }}"' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "llm-d-inference-sim.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0)}}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "llm-d-inference-sim.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + export LOCAL_PORT=8001 + export HOST_NAME=localhost + + echo "Run 'kubectl port-forward --namespace {{ .Release.Namespace }} $POD_NAME $LOCAL_PORT:$CONTAINER_PORT &' to forward the port." + echo "Then, you can test the service with:" + +curl --request POST \ + --url http://$HOST_NAME:$LOCAL_PORT/v1/chat/completions \ + --header 'content-type: application/json' \ + --data '{ + "model": "model1", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello!" + } + ] +}' +{{- end }} diff --git a/helm/llm-d-inference-sim/templates/_helpers.tpl b/helm/llm-d-inference-sim/templates/_helpers.tpl new file mode 100644 index 00000000..aa01480e --- /dev/null +++ b/helm/llm-d-inference-sim/templates/_helpers.tpl @@ -0,0 +1,53 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "llm-d-inference-sim.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "llm-d-inference-sim.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "llm-d-inference-sim.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "llm-d-inference-sim.labels" -}} +helm.sh/chart: {{ include "llm-d-inference-sim.chart" . }} +{{ include "llm-d-inference-sim.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "llm-d-inference-sim.selectorLabels" -}} +app.kubernetes.io/name: {{ include "llm-d-inference-sim.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + diff --git a/helm/llm-d-inference-sim/templates/configmap.yaml b/helm/llm-d-inference-sim/templates/configmap.yaml new file mode 100644 index 00000000..98066b8b --- /dev/null +++ b/helm/llm-d-inference-sim/templates/configmap.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "llm-d-inference-sim.fullname" . }}-config + labels: + {{- include "llm-d-inference-sim.labels" . | nindent 4 }} +data: + config.yaml: | + port: {{ .Values.config.port }} + model: "{{ .Values.config.model }}" + served-model-name: + {{- toYaml .Values.config.servedModelName | nindent 4 }} + max-loras: {{ .Values.config.maxLoras }} + max-cpu-loras: {{ .Values.config.maxCpuLoras }} + max-num-seqs: {{ .Values.config.maxNumSeqs }} + max-model-len: {{ .Values.config.maxModelLen }} + lora-modules: + {{- toYaml .Values.config.loraModules | nindent 4 }} + mode: "{{ .Values.config.mode }}" + time-to-first-token: {{ .Values.config.timeToFirstToken }} + inter-token-latency: {{ .Values.config.interTokenLatency }} + kv-cache-transfer-latency: {{ .Values.config.kvCacheTransferLatency }} + seed: {{ .Values.config.seed }} diff --git a/helm/llm-d-inference-sim/templates/deployment.yaml b/helm/llm-d-inference-sim/templates/deployment.yaml new file mode 100644 index 00000000..21f755f2 --- /dev/null +++ b/helm/llm-d-inference-sim/templates/deployment.yaml @@ -0,0 +1,89 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "llm-d-inference-sim.fullname" . }} + labels: + {{- include "llm-d-inference-sim.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "llm-d-inference-sim.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "llm-d-inference-sim.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --config + - /config/config.yaml + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: http + readinessProbe: + httpGet: + path: /ready + port: http + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: /config + volumes: + - name: config + configMap: + name: {{ include "llm-d-inference-sim.fullname" . }}-config + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} \ No newline at end of file diff --git a/helm/llm-d-inference-sim/templates/ingress.yaml b/helm/llm-d-inference-sim/templates/ingress.yaml new file mode 100644 index 00000000..a43687ff --- /dev/null +++ b/helm/llm-d-inference-sim/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "llm-d-inference-sim.fullname" . }} + labels: + {{- include "llm-d-inference-sim.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "llm-d-inference-sim.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/llm-d-inference-sim/templates/service.yaml b/helm/llm-d-inference-sim/templates/service.yaml new file mode 100644 index 00000000..b8c72e8f --- /dev/null +++ b/helm/llm-d-inference-sim/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "llm-d-inference-sim.fullname" . }} + labels: + {{- include "llm-d-inference-sim.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "llm-d-inference-sim.selectorLabels" . | nindent 4 }} diff --git a/helm/llm-d-inference-sim/values.yaml b/helm/llm-d-inference-sim/values.yaml new file mode 100644 index 00000000..0bcfb65a --- /dev/null +++ b/helm/llm-d-inference-sim/values.yaml @@ -0,0 +1,122 @@ +# Default values for llm-d-inference-sim. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: ghcr.io/llm-d/llm-d-inference-sim + # This sets the pull policy for images. + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "v0.4.0" + +# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 8001 + +# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: llm-d-inference-sim.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +livenessProbe: + httpGet: + path: / + port: http +readinessProbe: + httpGet: + path: / + port: http + +# Additional volumes on the output Deployment definition. +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +config: + port: 8001 + model: "Qwen/Qwen2-0.5B" + servedModelName: + - "model1" + - "model2" + maxLoras: 2 + maxCpuLoras: 5 + maxNumSeqs: 1000 + maxModelLen: 32768 + loraModules: + - '{"name":"lora1","path":"/path/to/lora1"}' + - '{"name":"lora2","path":"/path/to/lora2"}' + mode: "echo" + timeToFirstToken: 500 + interTokenLatency: 50 + kvCacheTransferLatency: 50 + seed: 100100100 \ No newline at end of file