Skip to content

Commit 7195d25

Browse files
authored
improvements to the chart (#716)
1 parent 095e892 commit 7195d25

File tree

3 files changed

+55
-65
lines changed

3 files changed

+55
-65
lines changed

charts/lorax/Chart.yaml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
apiVersion: v2
22
name: lorax
3-
description: LoRAX is the open-source framework for serving
3+
description: LoRAX is the open-source framework for serving
44
hundreds of fine-tuned LLMs in production for the price of one.
5-
version: 0.3.0
5+
version: 0.4.0
66
appVersion: 0.3.0
77

88
home: https://github.com/predibase/lorax
@@ -11,15 +11,14 @@ annotations:
1111
artifacthub.io/category: ai-machine-learning
1212

1313
keywords:
14-
- lorax
15-
- llama
16-
- llm
17-
- predibase
14+
- lorax
15+
- llama
16+
- llm
17+
- predibase
1818

1919
maintainers:
20-
- email: maintainers@predibase.com
21-
name: Predibase
20+
- email: maintainers@predibase.com
21+
name: Predibase
2222

2323
sources:
24-
- https://github.com/predibase/lorax
25-
24+
- https://github.com/predibase/lorax

charts/lorax/templates/deployment.yaml

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,16 @@ spec:
3333
{{- end }}
3434
containers:
3535
- args:
36-
- --model-id
37-
- {{ .Values.deployment.args.modelId }}
38-
- --max-input-length
39-
- {{ .Values.deployment.args.maxInputLength | quote }}
40-
- --max-total-tokens
41-
- {{ .Values.deployment.args.maxTotalTokens | quote }}
42-
- --max-batch-total-tokens
43-
- {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
44-
- --max-batch-prefill-tokens
45-
- {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
46-
- --sharded
47-
- {{ .Values.deployment.args.sharded | quote }}
48-
- --eager-prefill
49-
- {{ .Values.deployment.args.eagerPrefill | quote }}
36+
{{- range .Values.deployment.args }}
37+
- {{ .name }}
38+
{{- if .value }}
39+
- {{ .value | quote }}
40+
{{- end }}
41+
{{- end }}
5042
env:
5143
- name: PORT
5244
value: "8000"
53-
- name: HUGGING_FACE_HUB_TOKEN
54-
value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
45+
{{- toYaml .Values.deployment.env | nindent 8 }}
5546
image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
5647
imagePullPolicy: IfNotPresent
5748
livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
@@ -68,7 +59,7 @@ spec:
6859
- mountPath: /dev/shm
6960
name: shm
7061
{{- if .Values.deployment.tolerations }}
71-
tolerations:
62+
tolerations:
7263
{{- toYaml .Values.deployment.tolerations | nindent 6 }}
7364
{{- end }}
7465
nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}

charts/lorax/values.yaml

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,54 +7,55 @@ deployment:
77
tag: "latest"
88

99
args:
10-
modelId: "mistralai/Mistral-7B-Instruct-v0.1"
11-
maxInputLength: 512
12-
maxTotalTokens: 1024
13-
maxBatchTotalTokens: 4096
14-
maxBatchPrefillTokens: 2048
15-
sharded: false
16-
eagerPrefill: false
10+
- name: "--model-id"
11+
value: "mistralai/Mistral-7B-Instruct-v0.1"
12+
- name: "--max-input-length"
13+
value: "512"
14+
- name: "--max-total-tokens"
15+
value: "1024"
16+
- name: "--max-batch-total-tokens"
17+
value: "4096"
18+
- name: "--max-batch-prefill-tokens"
19+
value: "2048"
20+
- name: "--eager-prefill"
21+
value: "false"
22+
- name: "--compile"
23+
value: "" # --complie does not take a second argument
1724

1825
env:
1926
# Your huggingface hub token. Required for some models such as the llama-2 family.
20-
huggingFaceHubToken: ""
21-
22-
# Model types that support dynamic adapter loading
23-
loraxEnabledModelTypes: "llama,mistral"
27+
- name: "HUGGING_FACE_HUB_TOKEN"
28+
value: ""
2429

2530
resources:
2631
limits:
27-
cpu: "8"
28-
ephemeral-storage: 100Gi
29-
memory: 27041Mi
3032
nvidia.com/gpu: "1"
3133
requests:
32-
cpu: "8"
33-
ephemeral-storage: 100Gi
34-
memory: 27041Mi
3534
nvidia.com/gpu: "1"
3635

37-
livenessProbe:
38-
failureThreshold: 240
39-
httpGet:
40-
path: /health
41-
port: http
42-
scheme: HTTP
43-
initialDelaySeconds: 5
44-
periodSeconds: 5
45-
successThreshold: 1
46-
timeoutSeconds: 1
36+
livenessProbe:
37+
{}
38+
# failureThreshold: 240
39+
# httpGet:
40+
# path: /health
41+
# port: http
42+
# scheme: HTTP
43+
# initialDelaySeconds: 5
44+
# periodSeconds: 5
45+
# successThreshold: 1
46+
# timeoutSeconds: 1
4747

48-
readinessProbe:
49-
failureThreshold: 600
50-
httpGet:
51-
path: /health
52-
port: http
53-
scheme: HTTP
54-
initialDelaySeconds: 5
55-
periodSeconds: 5
56-
successThreshold: 1
57-
timeoutSeconds: 1
48+
readinessProbe:
49+
{}
50+
# failureThreshold: 600
51+
# httpGet:
52+
# path: /health
53+
# port: http
54+
# scheme: HTTP
55+
# initialDelaySeconds: 5
56+
# periodSeconds: 5
57+
# successThreshold: 1
58+
# timeoutSeconds: 1
5859

5960
nodeSelector: {}
6061
tolerations: []
@@ -72,4 +73,3 @@ service:
7273
serviceType: ClusterIP
7374
port: 80
7475
additionalLabels: {}
75-

0 commit comments

Comments
 (0)