Skip to content

Commit 9b2b9cd

Browse files
authored
Merge pull request llm-d#451 from llm-d-incubation/runner-ci-policy
Add ok-to-test gate, use cluster HF token for e2e tests, fix istio issues, run wva against 2 different stacks simultaneously
2 parents b055550 + bf65d99 commit 9b2b9cd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1454
-458
lines changed

.github/workflows/ci-e2e-openshift.yaml

Lines changed: 507 additions & 53 deletions
Large diffs are not rendered by default.

.github/workflows/ci-pr-checks.yaml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,10 @@ jobs:
3636

3737
# - name: Run precommit checks
3838
# run: make precommit
39-
40-
- name: Run make test-e2e
41-
shell: bash
42-
run: |
43-
make test-e2e
44-
39+
40+
# Note: test-e2e requires Kind and full cluster setup
41+
# E2E tests run in separate ci-e2e-openshift workflow on self-hosted runner
42+
4543
- name: Run make build
4644
shell: bash
4745
run: |

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,36 @@ spec:
142142
143143
More examples in [config/samples/](config/samples/).
144144
145+
## Upgrading
146+
147+
### CRD Updates
148+
149+
**Important:** Helm does not automatically update CRDs during `helm upgrade`. When upgrading WVA to a new version with CRD changes, you must manually apply the updated CRDs first:
150+
151+
```bash
152+
# Apply the latest CRDs before upgrading
153+
kubectl apply -f charts/workload-variant-autoscaler/crds/
154+
155+
# Then upgrade the Helm release
156+
helm upgrade workload-variant-autoscaler ./charts/workload-variant-autoscaler \
157+
--namespace workload-variant-autoscaler-system \
158+
[your-values...]
159+
```
160+
161+
### Breaking Changes
162+
163+
#### v0.5.0 (upcoming)
164+
- **VariantAutoscaling CRD**: Added `scaleTargetRef` field to explicitly specify the target deployment. If not set, the controller infers the target from the `modelID` field.
165+
166+
### Verifying CRD Version
167+
168+
To check if your cluster has the latest CRD schema:
169+
170+
```bash
171+
# Check the CRD fields
172+
kubectl get crd variantautoscalings.llmd.ai -o jsonpath='{.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties}' | jq 'keys'
173+
```
174+
145175
## Contributing
146176

147177
We welcome contributions! See the llm-d Contributing Guide for guidelines.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "workload-variant-autoscaler.name" -}}
5+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11+
If release name contains chart name it will be used as a full name.
12+
*/}}
13+
{{- define "workload-variant-autoscaler.fullname" -}}
14+
{{- if .Values.fullnameOverride }}
15+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16+
{{- else }}
17+
{{- $name := default .Chart.Name .Values.nameOverride }}
18+
{{- if contains $name .Release.Name }}
19+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
20+
{{- else }}
21+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22+
{{- end }}
23+
{{- end }}
24+
{{- end }}
25+
26+
{{/*
27+
Create chart name and version as used by the chart label.
28+
*/}}
29+
{{- define "workload-variant-autoscaler.chart" -}}
30+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31+
{{- end }}
32+
33+
{{/*
34+
Common labels
35+
*/}}
36+
{{- define "workload-variant-autoscaler.labels" -}}
37+
helm.sh/chart: {{ include "workload-variant-autoscaler.chart" . }}
38+
{{ include "workload-variant-autoscaler.selectorLabels" . }}
39+
{{- if .Chart.AppVersion }}
40+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41+
{{- end }}
42+
app.kubernetes.io/managed-by: {{ .Release.Service }}
43+
{{- end }}
44+
45+
{{/*
46+
Selector labels
47+
*/}}
48+
{{- define "workload-variant-autoscaler.selectorLabels" -}}
49+
app.kubernetes.io/name: {{ include "workload-variant-autoscaler.name" . }}
50+
app.kubernetes.io/instance: {{ .Release.Name }}
51+
{{- end }}
52+
53+
{{/*
54+
Create the name of the service account to use
55+
*/}}
56+
{{- define "workload-variant-autoscaler.serviceAccountName" -}}
57+
{{- default (include "workload-variant-autoscaler.fullname" .) .Values.serviceAccount.name }}
58+
{{- end }}

charts/workload-variant-autoscaler/templates/hpa.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
apiVersion: autoscaling/v2
33
kind: HorizontalPodAutoscaler
44
metadata:
5-
name: vllm-deployment-hpa
5+
name: {{ include "workload-variant-autoscaler.fullname" . }}-hpa
66
namespace: {{ .Values.llmd.namespace }}
7+
labels:
8+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
79
spec:
810
scaleTargetRef:
911
apiVersion: apps/v1
@@ -34,6 +36,7 @@ spec:
3436
selector:
3537
matchLabels:
3638
variant_name: {{ printf "%s-decode" .Values.llmd.modelName }}
39+
exported_namespace: {{ .Values.llmd.namespace }}
3740
target:
3841
type: AverageValue
3942
averageValue: {{ .Values.hpa.targetAverageValue | quote }}
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1+
{{- if .Values.controller.enabled }}
12
apiVersion: rbac.authorization.k8s.io/v1
23
kind: ClusterRoleBinding
34
metadata:
4-
name: prometheus-adapter-monitoring
5+
name: {{ include "workload-variant-autoscaler.fullname" . }}-prometheus-adapter-monitoring
56
labels:
6-
app.kubernetes.io/name: workload-variant-autoscaler
7-
app.kubernetes.io/instance: {{ .Release.Name }}
7+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
88
roleRef:
99
apiGroup: rbac.authorization.k8s.io
1010
kind: ClusterRole
@@ -13,3 +13,4 @@ subjects:
1313
- kind: ServiceAccount
1414
name: {{ .Values.wva.prometheus.serviceAccountName }}
1515
namespace: {{ .Values.wva.prometheus.monitoringNamespace }}
16+
{{- end }}
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1+
{{- if .Values.controller.enabled }}
12
apiVersion: rbac.authorization.k8s.io/v1
23
kind: ClusterRoleBinding
34
metadata:
4-
name: workload-variant-autoscaler-monitoring
5+
name: {{ include "workload-variant-autoscaler.fullname" . }}-monitoring
56
labels:
6-
app.kubernetes.io/name: workload-variant-autoscaler
7-
app.kubernetes.io/instance: {{ .Release.Name }}
7+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
88
roleRef:
99
apiGroup: rbac.authorization.k8s.io
1010
kind: ClusterRole
1111
name: cluster-monitoring-view
1212
subjects:
1313
- kind: ServiceAccount
14-
name: workload-variant-autoscaler-controller-manager
14+
name: {{ include "workload-variant-autoscaler.fullname" . }}-controller-manager
1515
namespace: {{ .Release.Namespace }}
16+
{{- end }}

charts/workload-variant-autoscaler/templates/manager/wva-configmap-accelerator-costs.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
{{- if .Values.controller.enabled }}
12
apiVersion: v1
23
kind: ConfigMap
34
metadata:
4-
name: accelerator-unit-costs
5+
name: {{ include "workload-variant-autoscaler.fullname" . }}-accelerator-unit-costs
56
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
69
data:
710
A100: |
811
{
@@ -34,3 +37,4 @@ data:
3437
"device": "NVIDIA-L40S",
3538
"cost": "32.00"
3639
}
40+
{{- end }}

charts/workload-variant-autoscaler/templates/manager/wva-configmap-service-class.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{{- if .Values.controller.enabled }}
12
apiVersion: v1
23
kind: ConfigMap
34
# This configMap defines the set of accelerators available
@@ -9,8 +10,10 @@ kind: ConfigMap
910
# - cost is the cents/hour cost of this accelerator
1011
#
1112
metadata:
12-
name: service-classes-config
13+
name: {{ include "workload-variant-autoscaler.fullname" . }}-service-classes-config
1314
namespace: {{ .Release.Namespace }}
15+
labels:
16+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
1417
data:
1518
premium.yaml: |
1619
name: Premium
@@ -35,3 +38,4 @@ data:
3538
- model: meta/llama0-7b
3639
slo-tpot: 150
3740
slo-ttft: 1500
41+
{{- end }}

charts/workload-variant-autoscaler/templates/manager/wva-configmap.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1+
{{- if .Values.controller.enabled }}
12
apiVersion: v1
23
kind: ConfigMap
34
metadata:
4-
name: workload-variant-autoscaler-variantautoscaling-config
5+
name: {{ include "workload-variant-autoscaler.fullname" . }}-variantautoscaling-config
56
namespace: {{ .Release.Namespace }}
67
labels:
7-
app.kubernetes.io/name: workload-variant-autoscaler
8+
{{- include "workload-variant-autoscaler.labels" . | nindent 4 }}
89
data:
910
# Prometheus configuration - REQUIRED: Set your Prometheus server URL
1011
# Examples:
@@ -21,11 +22,11 @@ data:
2122
# PROMETHEUS_CLIENT_KEY_PATH: "/path/to/client.key" # Client private key for mutual TLS
2223
# PROMETHEUS_SERVER_NAME: "prometheus.example.com" # Expected server name for SNI
2324
PROMETHEUS_TLS_INSECURE_SKIP_VERIFY: {{ if and .Values.wva.prometheus.tls (hasKey .Values.wva.prometheus.tls "insecureSkipVerify") }}{{ .Values.wva.prometheus.tls.insecureSkipVerify | quote }}{{ else }}"true"{{ end }}
24-
25+
2526
# Authentication Configuration (BearerToken takes precedence over TokenPath)
2627
# PROMETHEUS_BEARER_TOKEN: "your-token-here" # Direct bearer token (development/testing)
2728
# PROMETHEUS_TOKEN_PATH: "/path/to/token/file" # Path to bearer token file (production with mounted secrets)
28-
29+
2930
# Optimization configuration
3031
GLOBAL_OPT_INTERVAL: {{ .Values.wva.reconcileInterval | quote }}
3132

@@ -56,3 +57,4 @@ data:
5657
# EPP_METRICS_CACHE_TTL: "15s"
5758
# EPP_METRICS_CACHE_MAX_SIZE: "500"
5859
# EPP_METRICS_CACHE_CLEANUP_INTERVAL: "30s"
60+
{{- end }}

0 commit comments

Comments
 (0)