Skip to content

Commit 053fe6f

Browse files
authored
Merge pull request #38 from stackhpc/feat/dependency-updates
Add workflow for automated vLLM updates
2 parents 5611ae3 + 97f03a0 commit 053fe6f

File tree

3 files changed

+22
-20
lines changed

3 files changed

+22
-20
lines changed

.github/workflows/update-dependencies.yml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,26 @@ jobs:
2424
set -xe
2525
2626
# Install dependency
27-
apt update && apt install -y jq yq
27+
sudo apt update
28+
sudo apt install -y jq
29+
sudo snap install yq
2830
2931
# Tell git who we are for commits
3032
git config user.email "${{ github.actor }}"
3133
git config user.name "${{ github.actor }}"
3234
3335
# Get latest vLLM release tag and replace it in various places
34-
OLD_VLLM_TAG=$(yq '.api.image.version' chart/values.yml)
35-
NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
36+
CHART_VALUES=chart/values.yaml
37+
# Export vars so that they can be used by yq's strenv function
38+
export OLD_VLLM_TAG=$(yq '.api.image.version' $CHART_VALUES)
39+
export NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
3640
if [[ $OLD_VLLM_TAG != $NEW_VLLM_TAG ]]; then
3741
# Set new release tag output
3842
echo new_vllm_tag=$NEW_VLLM_TAG >> $GITHUB_OUTPUT
3943
# Update yaml in-place with yq
40-
yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' chart/values.yaml
44+
yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' $CHART_VALUES
4145
# Can't use in-place editing with jq
42-
jq --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json.new
46+
jq --indent 4 --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json > chart/values.schema.json.new
4347
mv chart/values.schema.json{.new,}
4448
fi
4549
@@ -49,6 +53,6 @@ jobs:
4953
with:
5054
base: main
5155
branch: update/vllm-${{ steps.dependency_updates.outputs.new_vllm_tag }}
52-
title: "Update dependencies"
56+
title: "Update vLLM to ${{ steps.dependency_updates.outputs.new_vllm_tag }}"
5357
body: This PR was automatically generated by GitHub Actions.
5458
delete-branch: true

chart/values.schema.json

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,17 @@
1212
"default": "microsoft/Phi-3.5-mini-instruct"
1313
},
1414
"token": {
15-
"type": ["string", "null"],
15+
"type": [
16+
"string",
17+
"null"
18+
],
1619
"title": "Access Token",
1720
"description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
1821
}
1922
},
20-
"required": ["model"]
23+
"required": [
24+
"model"
25+
]
2126
},
2227
"ui": {
2328
"type": "object",
@@ -87,9 +92,11 @@
8792
"minimum": -2,
8893
"maximum": 2
8994
}
90-
9195
},
92-
"required": ["hf_model_name", "hf_model_instruction"]
96+
"required": [
97+
"hf_model_name",
98+
"hf_model_instruction"
99+
]
93100
}
94101
}
95102
},
@@ -107,7 +114,7 @@
107114
"type": "string",
108115
"title": "Backend vLLM version",
109116
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
110-
"default": "v0.5.4"
117+
"default": "v0.5.5"
111118
}
112119
}
113120
}

chart/values.yaml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ huggingface:
1515
# repo files yet. This chart value provides a hook to manually apply the
1616
# correct chat template for such models.
1717
chatTemplate:
18-
1918
# For private/gated huggingface models (e.g. Meta's Llama models)
2019
# you must provide your own huggingface token, for details see:
2120
# https://huggingface.co/docs/hub/security-tokens
@@ -29,7 +28,6 @@ huggingface:
2928
# OR FOR TESTING PURPOSES ONLY, you can instead provide the secret directly
3029
# as a chart value here (if secretName is set above then it will take priority)
3130
token:
32-
3331
# Configuration for the backend model serving API
3432
api:
3533
# Container image config
@@ -51,13 +49,11 @@ api:
5149
iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
5250
description: |
5351
The raw inference API endpoints for the deployed LLM.
54-
5552
# Config for huggingface model cache volume
5653
# This is mounted at /root/.cache/huggingface in the api deployment
5754
cacheVolume:
5855
hostPath:
5956
path: /tmp/llm/huggingface-cache
60-
6157
# Number of gpus to requests for each api pod instance
6258
# NOTE: This must be in the range 1 <= value <= N, where
6359
# 'N' is the number of GPUs available in a single
@@ -73,15 +69,12 @@ api:
7369
# to preform a rolling zero-downtime update
7470
updateStrategy:
7571
type: Recreate
76-
7772
# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
7873
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
7974
modelMaxContextLength:
80-
8175
# Extra args to supply to the vLLM backend, see
8276
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
8377
extraArgs: []
84-
8578
# Configuration for the frontend web interface
8679
ui:
8780
# Toggles installation of the gradio web UI
@@ -124,7 +117,6 @@ ui:
124117
rollingUpdate:
125118
maxSurge: 25%
126119
maxUnavailable: 25%
127-
128120
# Settings for configuring ingress resources
129121
# to make the UI and/or backend API accessible
130122
# outside the cluster.
@@ -155,6 +147,5 @@ ingress:
155147
# Annotations to apply to the ingress resource
156148
# e.g. for cert-manager integration
157149
annotations:
158-
159150
reloader:
160151
watchGlobally: false

0 commit comments

Comments
 (0)