File tree Expand file tree Collapse file tree 3 files changed +55
-65
lines changed
Expand file tree Collapse file tree 3 files changed +55
-65
lines changed Original file line number Diff line number Diff line change 11apiVersion : v2
22name : lorax
3- description : LoRAX is the open-source framework for serving
3+ description : LoRAX is the open-source framework for serving
44 hundreds of fine-tuned LLMs in production for the price of one.
5- version : 0.3 .0
5+ version : 0.4 .0
66appVersion : 0.3.0
77
88home : https://github.com/predibase/lorax
@@ -11,15 +11,14 @@ annotations:
1111 artifacthub.io/category : ai-machine-learning
1212
1313keywords :
14- - lorax
15- - llama
16- - llm
17- - predibase
14+ - lorax
15+ - llama
16+ - llm
17+ - predibase
1818
1919maintainers :
20- - email : maintainers@predibase.com
21- name : Predibase
20+ - email : maintainers@predibase.com
21+ name : Predibase
2222
2323sources :
24- - https://github.com/predibase/lorax
25-
24+ - https://github.com/predibase/lorax
Original file line number Diff line number Diff line change @@ -33,25 +33,16 @@ spec:
3333 {{- end }}
3434 containers :
3535 - args :
36- - --model-id
37- - {{ .Values.deployment.args.modelId }}
38- - --max-input-length
39- - {{ .Values.deployment.args.maxInputLength | quote }}
40- - --max-total-tokens
41- - {{ .Values.deployment.args.maxTotalTokens | quote }}
42- - --max-batch-total-tokens
43- - {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
44- - --max-batch-prefill-tokens
45- - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
46- - --sharded
47- - {{ .Values.deployment.args.sharded | quote }}
48- - --eager-prefill
49- - {{ .Values.deployment.args.eagerPrefill | quote }}
36+ {{- range .Values.deployment.args }}
37+ - {{ .name }}
38+ {{- if .value }}
39+ - {{ .value | quote }}
40+ {{- end }}
41+ {{- end }}
5042 env :
5143 - name : PORT
5244 value : " 8000"
53- - name : HUGGING_FACE_HUB_TOKEN
54- value : {{ .Values.deployment.env.huggingFaceHubToken | quote }}
45+ {{- toYaml .Values.deployment.env | nindent 8 }}
5546 image : {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
5647 imagePullPolicy : IfNotPresent
5748 livenessProbe : {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
6859 - mountPath : /dev/shm
6960 name : shm
7061 {{- if .Values.deployment.tolerations }}
71- tolerations :
62+ tolerations :
7263 {{- toYaml .Values.deployment.tolerations | nindent 6 }}
7364 {{- end }}
7465 nodeSelector : {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
Original file line number Diff line number Diff line change @@ -7,54 +7,55 @@ deployment:
77 tag : " latest"
88
99 args :
10- modelId : " mistralai/Mistral-7B-Instruct-v0.1"
11- maxInputLength : 512
12- maxTotalTokens : 1024
13- maxBatchTotalTokens : 4096
14- maxBatchPrefillTokens : 2048
15- sharded : false
16- eagerPrefill : false
10+ - name : " --model-id"
11+ value : " mistralai/Mistral-7B-Instruct-v0.1"
12+ - name : " --max-input-length"
13+ value : " 512"
14+ - name : " --max-total-tokens"
15+ value : " 1024"
16+ - name : " --max-batch-total-tokens"
17+ value : " 4096"
18+ - name : " --max-batch-prefill-tokens"
19+ value : " 2048"
20+ - name : " --eager-prefill"
21+ value : " false"
22+ - name : " --compile"
23+ value : " " # --complie does not take a second argument
1724
1825 env :
1926 # Your huggingface hub token. Required for some models such as the llama-2 family.
20- huggingFaceHubToken : " "
21-
22- # Model types that support dynamic adapter loading
23- loraxEnabledModelTypes : " llama,mistral"
27+ - name : " HUGGING_FACE_HUB_TOKEN"
28+ value : " "
2429
2530 resources :
2631 limits :
27- cpu : " 8"
28- ephemeral-storage : 100Gi
29- memory : 27041Mi
3032 nvidia.com/gpu : " 1"
3133 requests :
32- cpu : " 8"
33- ephemeral-storage : 100Gi
34- memory : 27041Mi
3534 nvidia.com/gpu : " 1"
3635
37- livenessProbe :
38- failureThreshold : 240
39- httpGet :
40- path : /health
41- port : http
42- scheme : HTTP
43- initialDelaySeconds : 5
44- periodSeconds : 5
45- successThreshold : 1
46- timeoutSeconds : 1
36+ livenessProbe :
37+ {}
38+ # failureThreshold: 240
39+ # httpGet:
40+ # path: /health
41+ # port: http
42+ # scheme: HTTP
43+ # initialDelaySeconds: 5
44+ # periodSeconds: 5
45+ # successThreshold: 1
46+ # timeoutSeconds: 1
4747
48- readinessProbe :
49- failureThreshold : 600
50- httpGet :
51- path : /health
52- port : http
53- scheme : HTTP
54- initialDelaySeconds : 5
55- periodSeconds : 5
56- successThreshold : 1
57- timeoutSeconds : 1
48+ readinessProbe :
49+ {}
50+ # failureThreshold: 600
51+ # httpGet:
52+ # path: /health
53+ # port: http
54+ # scheme: HTTP
55+ # initialDelaySeconds: 5
56+ # periodSeconds: 5
57+ # successThreshold: 1
58+ # timeoutSeconds: 1
5859
5960 nodeSelector : {}
6061 tolerations : []
@@ -72,4 +73,3 @@ service:
7273 serviceType : ClusterIP
7374 port : 80
7475 additionalLabels : {}
75-
You can’t perform that action at this time.
0 commit comments