diff --git a/charts/llm-d-modelservice/Chart.yaml b/charts/llm-d-modelservice/Chart.yaml index ba24b0c..123c006 100644 --- a/charts/llm-d-modelservice/Chart.yaml +++ b/charts/llm-d-modelservice/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: "v0.2.9" +version: "v0.2.10" # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/charts/llm-d-modelservice/templates/_helpers.tpl b/charts/llm-d-modelservice/templates/_helpers.tpl index 45767a5..e9e15ac 100644 --- a/charts/llm-d-modelservice/templates/_helpers.tpl +++ b/charts/llm-d-modelservice/templates/_helpers.tpl @@ -43,25 +43,31 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} app.kubernetes.io/managed-by: {{ .Release.Service }} {{- end }} -{{/* Create sanitized model name (DNS compliant) */}} -{{- define "llm-d-modelservice.sanitizedModelName" -}} - {{- $name := .Release.Name | lower | trim -}} - {{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}} - {{- $name = regexReplaceAll "^[\\-._]+" $name "" -}} - {{- $name = regexReplaceAll "[\\-._]+$" $name "" -}} - {{- $name = regexReplaceAll "\\." $name "-" -}} - - {{- if gt (len $name) 63 -}} - {{- $name = substr 0 63 $name -}} - {{- end -}} - -{{- $name -}} -{{- end }} +{{/* + Defines a k8s safe label with the model repo name +*/}} +{{- define "llm-d-modelservice.modelRepoLabel" -}} +{{- $in := default "" . -}} +{{- /* strip everything up to and including the last "/" */ -}} +{{- $repo := regexReplaceAll "^.*/" $in "" -}} +{{- /* keep only [A-Za-z0-9._-], replace others with "-" */ -}} +{{- $repo = regexReplaceAll "[^A-Za-z0-9._-]" "-" $repo -}} +{{- /* must start/end with alphanumeric */ -}} +{{- $repo = regexReplaceAll "^[^A-Za-z0-9]+" "" $repo -}} +{{- $repo = regexReplaceAll "[^A-Za-z0-9]+$" "" $repo -}} +{{- if eq $repo "" }}{{- $repo = "model" -}}{{- end -}} +{{- if gt (len $repo) 63 -}} + {{- $repo = trunc 63 $repo -}} + {{- $repo = regexReplaceAll "[^A-Za-z0-9]+$" "" $repo -}} + {{- if eq $repo "" }}{{- $repo = "model" -}}{{- end -}} +{{- end -}} +{{- $repo -}} +{{- end -}} {{/* Create common shared by prefill and decode deployment/LWS */}} {{- define "llm-d-modelservice.pdlabels" -}} llm-d.ai/inferenceServing: "true" -llm-d.ai/model: {{ (include "llm-d-modelservice.fullname" .) -}} +llm-d.ai/model-repo: {{ (include "llm-d-modelservice.modelRepoLabel" .Values.modelArtifacts.name ) -}} {{- end }} {{/* Create labels for the prefill deployment/LWS */}} diff --git a/examples/output-cpu.yaml b/examples/output-cpu.yaml index 77125b7..da61f29 100644 --- a/examples/output-cpu.yaml +++ b/examples/output-cpu.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: cpu-sim-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: ServiceAccount metadata: name: cpu-sim-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -188,7 +188,7 @@ kind: Service metadata: name: cpu-sim-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -208,7 +208,7 @@ kind: Deployment metadata: name: cpu-sim-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -216,13 +216,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: cpu-sim-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: cpu-sim-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode spec: initContainers: @@ -348,7 +348,7 @@ kind: Deployment metadata: name: cpu-sim-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -356,13 +356,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: cpu-sim-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: cpu-sim-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill spec: @@ -422,7 +422,7 @@ spec: name: cpu-sim-llm-d-modelservice-epp selector: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: cpu-sim-llm-d-modelservice + llm-d.ai/model-repo: model targetPortNumber: 8000 --- # Source: llm-d-modelservice/templates/httproute.yaml @@ -432,7 +432,7 @@ metadata: name: cpu-sim-llm-d-modelservice namespace: default labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm annotations: diff --git a/examples/output-pd.yaml b/examples/output-pd.yaml index 4261c91..91d7a22 100644 --- a/examples/output-pd.yaml +++ b/examples/output-pd.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pd-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: ServiceAccount metadata: name: pd-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -188,7 +188,7 @@ kind: Service metadata: name: pd-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -208,7 +208,7 @@ kind: Deployment metadata: name: pd-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -216,13 +216,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode spec: initContainers: @@ -388,7 +388,7 @@ kind: Deployment metadata: name: pd-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -396,13 +396,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill spec: @@ -476,7 +476,7 @@ kind: InferenceModel metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model name: pd-llm-d-modelservice spec: criticality: Critical @@ -498,7 +498,7 @@ spec: name: pd-llm-d-modelservice-epp selector: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pd-llm-d-modelservice + llm-d.ai/model-repo: model targetPortNumber: 8000 --- # Source: llm-d-modelservice/templates/httproute.yaml @@ -508,7 +508,7 @@ metadata: name: pd-llm-d-modelservice namespace: default labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm annotations: diff --git a/examples/output-pvc-hf.yaml b/examples/output-pvc-hf.yaml index 52464ae..68f4c0c 100644 --- a/examples/output-pvc-hf.yaml +++ b/examples/output-pvc-hf.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pvc-hf-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: ServiceAccount metadata: name: pvc-hf-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -188,7 +188,7 @@ kind: Service metadata: name: pvc-hf-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -208,7 +208,7 @@ kind: Deployment metadata: name: pvc-hf-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -216,13 +216,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode spec: initContainers: @@ -388,7 +388,7 @@ kind: Deployment metadata: name: pvc-hf-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -396,13 +396,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill spec: @@ -476,7 +476,7 @@ kind: InferenceModel metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model name: pvc-hf-llm-d-modelservice spec: criticality: Critical @@ -498,7 +498,7 @@ spec: name: pvc-hf-llm-d-modelservice-epp selector: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-hf-llm-d-modelservice + llm-d.ai/model-repo: model targetPortNumber: 8000 --- # Source: llm-d-modelservice/templates/httproute.yaml @@ -508,7 +508,7 @@ metadata: name: pvc-hf-llm-d-modelservice namespace: default labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm annotations: diff --git a/examples/output-pvc.yaml b/examples/output-pvc.yaml index ee76b15..fbfa8cc 100644 --- a/examples/output-pvc.yaml +++ b/examples/output-pvc.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pvc-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: ServiceAccount metadata: name: pvc-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm --- @@ -188,7 +188,7 @@ kind: Service metadata: name: pvc-llm-d-modelservice-epp labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -208,7 +208,7 @@ kind: Deployment metadata: name: pvc-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -216,13 +216,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: decode spec: initContainers: @@ -386,7 +386,7 @@ kind: Deployment metadata: name: pvc-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm spec: @@ -394,13 +394,13 @@ spec: selector: matchLabels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill template: metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model llm-d.ai/role: prefill spec: @@ -472,7 +472,7 @@ kind: InferenceModel metadata: labels: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model name: pvc-llm-d-modelservice spec: criticality: Critical @@ -494,7 +494,7 @@ spec: name: pvc-llm-d-modelservice-epp selector: llm-d.ai/inferenceServing: "true" - llm-d.ai/model: pvc-llm-d-modelservice + llm-d.ai/model-repo: model targetPortNumber: 8000 --- # Source: llm-d-modelservice/templates/httproute.yaml @@ -504,7 +504,7 @@ metadata: name: pvc-llm-d-modelservice namespace: default labels: - helm.sh/chart: llm-d-modelservice-v0.2.9 + helm.sh/chart: llm-d-modelservice-v0.2.10 app.kubernetes.io/version: "v0.2.0" app.kubernetes.io/managed-by: Helm annotations: