stackhpc · sd109 · Aug 30, 2024 · Aug 29, 2024
diff --git a/.github/workflows/update-dependencies.yml b/.github/workflows/update-dependencies.yml
@@ -24,22 +24,26 @@ jobs:
           set -xe
 
           # Install dependency
-          apt update && apt install -y jq yq
+          sudo apt update
+          sudo apt install -y jq
+          sudo snap install yq
 
           # Tell git who we are for commits
           git config user.email "${{ github.actor }}"
           git config user.name "${{ github.actor }}"
 
           # Get latest vLLM release tag and replace it in various places
-          OLD_VLLM_TAG=$(yq '.api.image.version' chart/values.yml)
-          NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
+          CHART_VALUES=chart/values.yaml
+          # Export vars so that they can be used by yq's strenv function
+          export OLD_VLLM_TAG=$(yq '.api.image.version' $CHART_VALUES)
+          export NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
           if [[ $OLD_VLLM_TAG != $NEW_VLLM_TAG ]]; then
             # Set new release tag output
             echo new_vllm_tag=$NEW_VLLM_TAG >> $GITHUB_OUTPUT
             # Update yaml in-place with yq
-            yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' chart/values.yaml
+            yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' $CHART_VALUES
             # Can't use in-place editing with jq
-            jq --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json.new
+            jq --indent 4 --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json > chart/values.schema.json.new
             mv chart/values.schema.json{.new,}
           fi
 
@@ -49,6 +53,6 @@ jobs:
         with:
           base: main
           branch: update/vllm-${{ steps.dependency_updates.outputs.new_vllm_tag }}
-          title: "Update dependencies"
+          title: "Update vLLM to ${{ steps.dependency_updates.outputs.new_vllm_tag }}"
           body: This PR was automatically generated by GitHub Actions.
           delete-branch: true
diff --git a/chart/values.schema.json b/chart/values.schema.json
@@ -12,12 +12,17 @@
                     "default": "microsoft/Phi-3.5-mini-instruct"
                 },
                 "token": {
-                    "type": ["string", "null"],
+                    "type": [
+                        "string",
+                        "null"
+                    ],
                     "title": "Access Token",
                     "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
                 }
             },
-            "required": ["model"]
+            "required": [
+                "model"
+            ]
         },
         "ui": {
             "type": "object",
@@ -87,9 +92,11 @@
                             "minimum": -2,
                             "maximum": 2
                         }
-
                     },
-                    "required": ["hf_model_name", "hf_model_instruction"]
+                    "required": [
+                        "hf_model_name",
+                        "hf_model_instruction"
+                    ]
                 }
             }
         },
@@ -107,7 +114,7 @@
                             "type": "string",
                             "title": "Backend vLLM version",
                             "description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
-                            "default": "v0.5.4"
+                            "default": "v0.5.5"
                         }
                     }
                 }

diff --git a/chart/values.yaml b/chart/values.yaml
@@ -15,7 +15,6 @@ huggingface:
   # repo files yet. This chart value provides a hook to manually apply the
   # correct chat template for such models.
   chatTemplate:
-
   # For private/gated huggingface models (e.g. Meta's Llama models)
   # you must provide your own huggingface token, for details see:
   # https://huggingface.co/docs/hub/security-tokens
@@ -29,7 +28,6 @@ huggingface:
   # OR FOR TESTING PURPOSES ONLY, you can instead provide the secret directly
   # as a chart value here (if secretName is set above then it will take priority)
   token:
-
 # Configuration for the backend model serving API
 api:
   # Container image config
@@ -51,13 +49,11 @@ api:
       iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
       description: |
         The raw inference API endpoints for the deployed LLM.
-
   # Config for huggingface model cache volume
   # This is mounted at /root/.cache/huggingface in the api deployment
   cacheVolume:
     hostPath:
       path: /tmp/llm/huggingface-cache
-
   # Number of gpus to requests for each api pod instance
   # NOTE: This must be in the range 1 <= value <= N, where
   # 'N' is the number of GPUs available in a single
@@ -73,15 +69,12 @@ api:
   # to preform a rolling zero-downtime update
   updateStrategy:
     type: Recreate
-
   # The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
   # https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
   modelMaxContextLength:
-
   # Extra args to supply to the vLLM backend, see
   # https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
   extraArgs: []
-
 # Configuration for the frontend web interface
 ui:
   # Toggles installation of the gradio web UI
@@ -124,7 +117,6 @@ ui:
     rollingUpdate:
       maxSurge: 25%
       maxUnavailable: 25%
-
 # Settings for configuring ingress resources
 # to make the UI and/or backend API accessible
 # outside the cluster.
@@ -155,6 +147,5 @@ ingress:
     # Annotations to apply to the ingress resource
     # e.g. for cert-manager integration
     annotations:
-
 reloader:
   watchGlobally: false