Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions .github/workflows/update-dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,26 @@ jobs:
set -xe

# Install dependency
apt update && apt install -y jq yq
sudo apt update
sudo apt install -y jq
sudo snap install yq

# Tell git who we are for commits
git config user.email "${{ github.actor }}"
git config user.name "${{ github.actor }}"

# Get latest vLLM release tag and replace it in various places
OLD_VLLM_TAG=$(yq '.api.image.version' chart/values.yml)
NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
CHART_VALUES=chart/values.yaml
# Export vars so that they can be used by yq's strenv function
export OLD_VLLM_TAG=$(yq '.api.image.version' $CHART_VALUES)
export NEW_VLLM_TAG=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq .tag_name | sed s/\"//g)
if [[ $OLD_VLLM_TAG != $NEW_VLLM_TAG ]]; then
# Set new release tag output
echo new_vllm_tag=$NEW_VLLM_TAG >> $GITHUB_OUTPUT
# Update yaml in-place with yq
yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' chart/values.yaml
yq e -i '.api.image.version = strenv(NEW_VLLM_TAG)' $CHART_VALUES
# Can't use in-place editing with jq
jq --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json.new
jq --indent 4 --arg tag $NEW_VLLM_TAG '.properties.api.properties.image.properties.version.default = $tag' chart/values.schema.json > chart/values.schema.json.new
mv chart/values.schema.json{.new,}
fi

Expand All @@ -49,6 +53,6 @@ jobs:
with:
base: main
branch: update/vllm-${{ steps.dependency_updates.outputs.new_vllm_tag }}
title: "Update dependencies"
title: "Update vLLM to ${{ steps.dependency_updates.outputs.new_vllm_tag }}"
body: This PR was automatically generated by GitHub Actions.
delete-branch: true
17 changes: 12 additions & 5 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@
"default": "microsoft/Phi-3.5-mini-instruct"
},
"token": {
"type": ["string", "null"],
"type": [
"string",
"null"
],
"title": "Access Token",
"description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated) (e.g. Llama 3)."
}
},
"required": ["model"]
"required": [
"model"
]
},
"ui": {
"type": "object",
Expand Down Expand Up @@ -87,9 +92,11 @@
"minimum": -2,
"maximum": 2
}

},
"required": ["hf_model_name", "hf_model_instruction"]
"required": [
"hf_model_name",
"hf_model_instruction"
]
}
}
},
Expand All @@ -107,7 +114,7 @@
"type": "string",
"title": "Backend vLLM version",
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
"default": "v0.5.4"
"default": "v0.5.5"
}
}
}
Expand Down
9 changes: 0 additions & 9 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ huggingface:
# repo files yet. This chart value provides a hook to manually apply the
# correct chat template for such models.
chatTemplate:

# For private/gated huggingface models (e.g. Meta's Llama models)
# you must provide your own huggingface token, for details see:
# https://huggingface.co/docs/hub/security-tokens
Expand All @@ -29,7 +28,6 @@ huggingface:
# OR FOR TESTING PURPOSES ONLY, you can instead provide the secret directly
# as a chart value here (if secretName is set above then it will take priority)
token:

# Configuration for the backend model serving API
api:
# Container image config
Expand All @@ -51,13 +49,11 @@ api:
iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
description: |
The raw inference API endpoints for the deployed LLM.

# Config for huggingface model cache volume
# This is mounted at /root/.cache/huggingface in the api deployment
cacheVolume:
hostPath:
path: /tmp/llm/huggingface-cache

# Number of gpus to requests for each api pod instance
# NOTE: This must be in the range 1 <= value <= N, where
# 'N' is the number of GPUs available in a single
Expand All @@ -73,15 +69,12 @@ api:
# to preform a rolling zero-downtime update
updateStrategy:
type: Recreate

# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
modelMaxContextLength:

# Extra args to supply to the vLLM backend, see
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
extraArgs: []

# Configuration for the frontend web interface
ui:
# Toggles installation of the gradio web UI
Expand Down Expand Up @@ -124,7 +117,6 @@ ui:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%

# Settings for configuring ingress resources
# to make the UI and/or backend API accessible
# outside the cluster.
Expand Down Expand Up @@ -155,6 +147,5 @@ ingress:
# Annotations to apply to the ingress resource
# e.g. for cert-manager integration
annotations:

reloader:
watchGlobally: false
Loading