improvements to the chart (#716)

noah-yoshida · web-flow · commit 7195d2512719 · 2025-01-14T08:11:57.000+09:00
diff --git a/charts/lorax/Chart.yaml b/charts/lorax/Chart.yaml
@@ -1,8 +1,8 @@
 apiVersion: v2
 name: lorax
-description: LoRAX is the open-source framework for serving 
+description: LoRAX is the open-source framework for serving
   hundreds of fine-tuned LLMs in production for the price of one.
-version: 0.3.0
+version: 0.4.0
 appVersion: 0.3.0
 
 home: https://github.com/predibase/lorax
@@ -11,15 +11,14 @@ annotations:
   artifacthub.io/category: ai-machine-learning
 
 keywords:
-- lorax
-- llama
-- llm
-- predibase
+  - lorax
+  - llama
+  - llm
+  - predibase
 
 maintainers:
-- email: maintainers@predibase.com
-  name: Predibase
+  - email: maintainers@predibase.com
+    name: Predibase
 
 sources:
-- https://github.com/predibase/lorax
-
+  - https://github.com/predibase/lorax
diff --git a/charts/lorax/templates/deployment.yaml b/charts/lorax/templates/deployment.yaml
@@ -33,25 +33,16 @@ spec:
       {{- end }}
       containers:
       - args:
-        - --model-id
-        - {{ .Values.deployment.args.modelId }}
-        - --max-input-length
-        - {{ .Values.deployment.args.maxInputLength | quote }}
-        - --max-total-tokens
-        - {{ .Values.deployment.args.maxTotalTokens | quote }}
-        - --max-batch-total-tokens
-        - {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
-        - --max-batch-prefill-tokens
-        - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
-        - --sharded
-        - {{ .Values.deployment.args.sharded | quote }}
-        - --eager-prefill
-        - {{ .Values.deployment.args.eagerPrefill | quote }}
+      {{- range .Values.deployment.args }}
+        - {{ .name }}
+        {{- if .value }}
+        - {{ .value | quote }}
+        {{- end }}
+      {{- end }}
         env:
         - name: PORT
           value: "8000"
-        - name: HUGGING_FACE_HUB_TOKEN
-          value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
+{{- toYaml .Values.deployment.env | nindent 8 }}
         image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
         imagePullPolicy: IfNotPresent
         livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
@@ -68,7 +59,7 @@ spec:
         - mountPath: /dev/shm
           name: shm
       {{- if .Values.deployment.tolerations }}
-      tolerations: 
+      tolerations:
         {{- toYaml .Values.deployment.tolerations | nindent 6 }}
       {{- end }}
       nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
diff --git a/charts/lorax/values.yaml b/charts/lorax/values.yaml
@@ -7,54 +7,55 @@ deployment:
     tag: "latest"
 
   args:
-    modelId: "mistralai/Mistral-7B-Instruct-v0.1"
-    maxInputLength: 512
-    maxTotalTokens: 1024
-    maxBatchTotalTokens: 4096
-    maxBatchPrefillTokens: 2048
-    sharded: false
-    eagerPrefill: false
+    - name: "--model-id"
+      value: "mistralai/Mistral-7B-Instruct-v0.1"
+    - name: "--max-input-length"
+      value: "512"
+    - name: "--max-total-tokens"
+      value: "1024"
+    - name: "--max-batch-total-tokens"
+      value: "4096"
+    - name: "--max-batch-prefill-tokens"
+      value: "2048"
+    - name: "--eager-prefill"
+      value: "false"
+    - name: "--compile"
+      value: "" # --complie does not take a second argument
 
   env:
     # Your huggingface hub token. Required for some models such as the llama-2 family.
-    huggingFaceHubToken: ""
-
-    # Model types that support dynamic adapter loading
-    loraxEnabledModelTypes: "llama,mistral"
+    - name: "HUGGING_FACE_HUB_TOKEN"
+      value: ""
 
   resources:
     limits:
-      cpu: "8"
-      ephemeral-storage: 100Gi
-      memory: 27041Mi
       nvidia.com/gpu: "1"
     requests:
-      cpu: "8"
-      ephemeral-storage: 100Gi
-      memory: 27041Mi
       nvidia.com/gpu: "1"
 
-  livenessProbe: 
-    failureThreshold: 240
-    httpGet:
-      path: /health
-      port: http
-      scheme: HTTP
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    successThreshold: 1
-    timeoutSeconds: 1
+  livenessProbe:
+    {}
+    # failureThreshold: 240
+    # httpGet:
+    #   path: /health
+    #   port: http
+    #   scheme: HTTP
+    # initialDelaySeconds: 5
+    # periodSeconds: 5
+    # successThreshold: 1
+    # timeoutSeconds: 1
 
-  readinessProbe: 
-    failureThreshold: 600
-    httpGet:
-      path: /health
-      port: http
-      scheme: HTTP
-    initialDelaySeconds: 5
-    periodSeconds: 5
-    successThreshold: 1
-    timeoutSeconds: 1
+  readinessProbe:
+    {}
+    # failureThreshold: 600
+    # httpGet:
+    #   path: /health
+    #   port: http
+    #   scheme: HTTP
+    # initialDelaySeconds: 5
+    # periodSeconds: 5
+    # successThreshold: 1
+    # timeoutSeconds: 1
 
   nodeSelector: {}
   tolerations: []
@@ -72,4 +73,3 @@ service:
   serviceType: ClusterIP
   port: 80
   additionalLabels: {}
-