Skip to content

Commit d8076b9

Browse files
[BugFix] fix lora controller reconcile logic (#565)
* [CI] Add prefix aware routing test Signed-off-by: Rui Zhang <[email protected]> * [ci] refactor k8s discovery e2e test Signed-off-by: Rui Zhang <[email protected]> * [CI] Refactor static discovery testing so that it can support multiple logic Signed-off-by: Rui Zhang <[email protected]> * [CI] Add static e2e test for prefixaware Signed-off-by: Rui Zhang <[email protected]> * refactor the code Signed-off-by: Rui Zhang <[email protected]> * [CI] refactor Signed-off-by: Rui Zhang <[email protected]> * [CI] Add multiple routing logic test Signed-off-by: Rui Zhang <[email protected]> * [CI] fix bug Signed-off-by: Rui Zhang <[email protected]> * modify Signed-off-by: Rui Zhang <[email protected]> * modify docker file Signed-off-by: Rui Zhang <[email protected]> --------- Signed-off-by: Rui Zhang <[email protected]>
1 parent 4e92dde commit d8076b9

File tree

6 files changed

+302
-77
lines changed

6 files changed

+302
-77
lines changed

operator/Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ COPY internal/ internal/
2323
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
2424
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
2525

26-
# Use distroless as minimal base image to package the manager binary
27-
# Refer to https://github.com/GoogleContainerTools/distroless for more details
28-
FROM gcr.io/distroless/static:nonroot
26+
# Use python as minimal base image to package the manager binary
27+
FROM python:3.11-slim
2928
WORKDIR /
3029
COPY --from=builder /workspace/manager .
30+
RUN pip install --no-cache-dir huggingface_hub[cli]
31+
3132
USER 65532:65532
3233

3334
ENTRYPOINT ["/manager"]

operator/api/v1alpha1/loraadapter_types.go

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ type LoraAdapterSpec struct {
3434
// DeploymentConfig defines how the adapter should be deployed
3535
LoraAdapterDeploymentConfig LoraAdapterDeploymentConfig `json:"loraAdapterDeploymentConfig,omitempty"`
3636
// VLLMApiKey defines the configuration for vLLM API key authentication
37-
VLLMApiKey *VLLMApiKeyConfig `json:"vllmApiKey,omitempty"`
37+
VLLMApiKey *VLLMApiKeySecretRef `json:"vllmApiKey,omitempty"`
3838
}
3939

4040
type AdapterSource struct {
@@ -59,8 +59,12 @@ type AdapterSource struct {
5959

6060
// +mapType=atomic
6161
type SecretRef struct {
62-
// Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?
63-
Name string `json:"name,omitempty"`
62+
// Name of the secret
63+
// +kubebuilder:validation:Required
64+
Name string `json:"name"`
65+
// Key in the secret containing the value
66+
// +kubebuilder:validation:Required
67+
Key string `json:"key"`
6468
}
6569

6670
type LoraAdapterDeploymentConfig struct {
@@ -74,16 +78,6 @@ type LoraAdapterDeploymentConfig struct {
7478
Replicas *int32 `json:"replicas,omitempty"`
7579
}
7680

77-
// VLLMApiKeyConfig defines how to obtain the vLLM API key
78-
type VLLMApiKeyConfig struct {
79-
// Direct API key value
80-
// +optional
81-
Value string `json:"value,omitempty"`
82-
// Reference to a secret containing the API key
83-
// +optional
84-
SecretRef *VLLMApiKeySecretRef `json:"secretRef,omitempty"`
85-
}
86-
8781
// VLLMApiKeySecretRef defines the reference to a secret containing the API key
8882
type VLLMApiKeySecretRef struct {
8983
// Name of the secret
@@ -122,7 +116,6 @@ type Condition struct {
122116
// Reason is a brief reason for the condition's current status.
123117
// +kubebuilder:validation:MaxLength=1024
124118
// +kubebuilder:validation:MinLength=1
125-
// +kubebuilder:validation:Pattern=`^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$`
126119
// +kubebuilder:validation:Required
127120
Reason string `json:"reason"`
128121
// Status is the status of the condition.

operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 2 additions & 22 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

operator/config/crd/bases/production-stack.vllm.ai_loraadapters.yaml

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,15 @@ spec:
6262
description: CredentialsSecretRef references a secret containing
6363
storage credentials.
6464
properties:
65+
key:
66+
description: Key in the secret containing the value
67+
type: string
6568
name:
66-
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
67-
TODO: Add other useful fields. apiVersion, kind, uid?'
69+
description: Name of the secret
6870
type: string
71+
required:
72+
- key
73+
- name
6974
type: object
7075
x-kubernetes-map-type: atomic
7176
maxAdapters:
@@ -121,22 +126,15 @@ spec:
121126
description: VLLMApiKey defines the configuration for vLLM API key
122127
authentication
123128
properties:
124-
secretRef:
125-
description: Reference to a secret containing the API key
126-
properties:
127-
secretKey:
128-
description: Key in the secret containing the API key
129-
type: string
130-
secretName:
131-
description: Name of the secret
132-
type: string
133-
required:
134-
- secretKey
135-
- secretName
136-
type: object
137-
value:
138-
description: Direct API key value
129+
secretKey:
130+
description: Key in the secret containing the API key
139131
type: string
132+
secretName:
133+
description: Name of the secret
134+
type: string
135+
required:
136+
- secretKey
137+
- secretName
140138
type: object
141139
required:
142140
- adapterSource
@@ -167,7 +165,6 @@ spec:
167165
status.
168166
maxLength: 1024
169167
minLength: 1
170-
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
171168
type: string
172169
status:
173170
description: Status is the status of the condition.

0 commit comments

Comments
 (0)