Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/llm-d-modelservice/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: "v0.2.9"
version: "v0.2.10"

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
36 changes: 21 additions & 15 deletions charts/llm-d-modelservice/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,31 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/* Create sanitized model name (DNS compliant) */}}
{{- define "llm-d-modelservice.sanitizedModelName" -}}
{{- $name := .Release.Name | lower | trim -}}
{{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}}
{{- $name = regexReplaceAll "^[\\-._]+" $name "" -}}
{{- $name = regexReplaceAll "[\\-._]+$" $name "" -}}
{{- $name = regexReplaceAll "\\." $name "-" -}}

{{- if gt (len $name) 63 -}}
{{- $name = substr 0 63 $name -}}
{{- end -}}

{{- $name -}}
{{- end }}
{{/*
Defines a k8s safe label with the model repo name
*/}}
{{- define "llm-d-modelservice.modelRepoLabel" -}}
{{- $in := default "" . -}}
{{- /* strip everything up to and including the last "/" */ -}}
{{- $repo := regexReplaceAll "^.*/" $in "" -}}
{{- /* keep only [A-Za-z0-9._-], replace others with "-" */ -}}
{{- $repo = regexReplaceAll "[^A-Za-z0-9._-]" "-" $repo -}}
{{- /* must start/end with alphanumeric */ -}}
{{- $repo = regexReplaceAll "^[^A-Za-z0-9]+" "" $repo -}}
{{- $repo = regexReplaceAll "[^A-Za-z0-9]+$" "" $repo -}}
{{- if eq $repo "" }}{{- $repo = "model" -}}{{- end -}}
{{- if gt (len $repo) 63 -}}
{{- $repo = trunc 63 $repo -}}
{{- $repo = regexReplaceAll "[^A-Za-z0-9]+$" "" $repo -}}
{{- if eq $repo "" }}{{- $repo = "model" -}}{{- end -}}
{{- end -}}
{{- $repo -}}
{{- end -}}

{{/* Create common shared by prefill and decode deployment/LWS */}}
{{- define "llm-d-modelservice.pdlabels" -}}
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: {{ (include "llm-d-modelservice.fullname" .) -}}
llm-d.ai/model-repo: {{ (include "llm-d-modelservice.modelRepoLabel" .Values.modelArtifacts.name ) -}}
{{- end }}

{{/* Create labels for the prefill deployment/LWS */}}
Expand Down
22 changes: 11 additions & 11 deletions examples/output-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: cpu-sim-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: ServiceAccount
metadata:
name: cpu-sim-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand Down Expand Up @@ -188,7 +188,7 @@ kind: Service
metadata:
name: cpu-sim-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -208,21 +208,21 @@ kind: Deployment
metadata:
name: cpu-sim-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: cpu-sim-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: decode
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: cpu-sim-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: decode
spec:
initContainers:
Expand Down Expand Up @@ -348,21 +348,21 @@ kind: Deployment
metadata:
name: cpu-sim-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: cpu-sim-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: cpu-sim-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
spec:

Expand Down Expand Up @@ -422,7 +422,7 @@ spec:
name: cpu-sim-llm-d-modelservice-epp
selector:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: cpu-sim-llm-d-modelservice
llm-d.ai/model-repo: model
targetPortNumber: 8000
---
# Source: llm-d-modelservice/templates/httproute.yaml
Expand All @@ -432,7 +432,7 @@ metadata:
name: cpu-sim-llm-d-modelservice
namespace: default
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
annotations:
Expand Down
24 changes: 12 additions & 12 deletions examples/output-pd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: pd-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: ServiceAccount
metadata:
name: pd-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand Down Expand Up @@ -188,7 +188,7 @@ kind: Service
metadata:
name: pd-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -208,21 +208,21 @@ kind: Deployment
metadata:
name: pd-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
Copy link
Collaborator

@jgchn jgchn Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking values-pd.yaml, shouldn't the output of this example be facebook-opt-125m?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is a bug, what happens is the contexts get nested so it actually loses global values applied from the overlay values file and somehow just gets the default values. Its pulling from random/model and parsing it to grab the repo name of model. Weird behaviour, will convert till draft till I figure it out

llm-d.ai/role: decode
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: decode
spec:
initContainers:
Expand Down Expand Up @@ -388,21 +388,21 @@ kind: Deployment
metadata:
name: pd-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
spec:

Expand Down Expand Up @@ -476,7 +476,7 @@ kind: InferenceModel
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
name: pd-llm-d-modelservice
spec:
criticality: Critical
Expand All @@ -498,7 +498,7 @@ spec:
name: pd-llm-d-modelservice-epp
selector:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pd-llm-d-modelservice
llm-d.ai/model-repo: model
targetPortNumber: 8000
---
# Source: llm-d-modelservice/templates/httproute.yaml
Expand All @@ -508,7 +508,7 @@ metadata:
name: pd-llm-d-modelservice
namespace: default
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
annotations:
Expand Down
24 changes: 12 additions & 12 deletions examples/output-pvc-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: pvc-hf-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: ServiceAccount
metadata:
name: pvc-hf-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
---
Expand Down Expand Up @@ -188,7 +188,7 @@ kind: Service
metadata:
name: pvc-hf-llm-d-modelservice-epp
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -208,21 +208,21 @@ kind: Deployment
metadata:
name: pvc-hf-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: decode
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: decode
spec:
initContainers:
Expand Down Expand Up @@ -388,21 +388,21 @@ kind: Deployment
metadata:
name: pvc-hf-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
template:
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
llm-d.ai/role: prefill
spec:

Expand Down Expand Up @@ -476,7 +476,7 @@ kind: InferenceModel
metadata:
labels:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
name: pvc-hf-llm-d-modelservice
spec:
criticality: Critical
Expand All @@ -498,7 +498,7 @@ spec:
name: pvc-hf-llm-d-modelservice-epp
selector:
llm-d.ai/inferenceServing: "true"
llm-d.ai/model: pvc-hf-llm-d-modelservice
llm-d.ai/model-repo: model
targetPortNumber: 8000
---
# Source: llm-d-modelservice/templates/httproute.yaml
Expand All @@ -508,7 +508,7 @@ metadata:
name: pvc-hf-llm-d-modelservice
namespace: default
labels:
helm.sh/chart: llm-d-modelservice-v0.2.9
helm.sh/chart: llm-d-modelservice-v0.2.10
app.kubernetes.io/version: "v0.2.0"
app.kubernetes.io/managed-by: Helm
annotations:
Expand Down
Loading