Skip to content

Commit 2352183

Browse files
committed
Merge branch 'Shaoting-Feng-qps-update' into main
2 parents 59cf526 + 27e54a1 commit 2352183

File tree

256 files changed

+16174
-1770
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

256 files changed

+16174
-1770
lines changed

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FIX #xxxx (*link existing issues this PR will resolve*)
66

77
---
88

9-
- [ ] Make sure the code changes pass the [pre-commit](CONTRIBUTING.md) checks.
9+
- [ ] Make sure the code changes pass the [pre-commit](../CONTRIBUTING.md) checks.
1010
- [ ] Sign-off your commit by using <code>-s</code> when doing <code>git commit</code>
1111
- [ ] Try to classify PRs for easy understanding of the type of changes, such as `[Bugfix]`, `[Feat]`, and `[CI]`.
1212

.github/curl-01-minimal-example.sh

Lines changed: 0 additions & 9 deletions
This file was deleted.

.github/curl-02-two-pods.sh

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
11
#!/bin/bash
22

3-
# Curl and save output
4-
[ ! -d "output-02-two-pods" ] && mkdir output-02-two-pods
5-
chmod -R 777 output-02-two-pods
6-
# shellcheck disable=SC2034 # result_model appears unused. Verify it or export it.
7-
result_model=$(curl -s http://"$1":"$2"/models | tee output-02-two-pods/output-02-two-pods.json)
8-
# shellcheck disable=SC2034 # result_query appears unused. Verify it or export it.
9-
result_query=$(curl -X POST http://"$1":"$2"/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-02-two-pods/output-02-two-pods.json)
3+
# Send a request to fetch the available models and save the response to a file
4+
result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-02-two-pods/models-02-two-pods.json)
5+
6+
# Check if the response is empty
7+
if [[ -z "$result_model" ]]; then
8+
echo "Error: Failed to retrieve model list. Response is empty."
9+
exit 1
10+
fi
11+
12+
# Send a request to generate a text completion and save the response to a file
13+
result_query=$(curl -s -X POST http://"$1":"$2"/v1/completions \
14+
-H "Content-Type: application/json" \
15+
-d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' \
16+
| tee output-02-two-pods/query-02-two-pods.json)
17+
18+
# Check if the response is empty
19+
if [[ -z "$result_query" ]]; then
20+
echo "Error: Failed to retrieve query response. Response is empty."
21+
exit 1
22+
fi
23+
24+
echo "Requests were successful."

.github/curl-04-multiple-models.sh

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,26 @@
11
#!/bin/bash
22

3-
# Curl and save output
4-
[ ! -d "output-04-multiple-models" ] && mkdir output-04-multiple-models
5-
chmod -R 777 output-04-multiple-models
6-
# shellcheck disable=SC2034 # result_model appears unused. Verify it or export it.
7-
result_model=$(curl -s "http://$1:$2/models" | tee output-04-multiple-models/models-04-multiple-models.json)
3+
# Send a request to fetch the available models and save the response to a file
4+
result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-04-multiple-models/models-04-multiple-models.json)
85

9-
# shellcheck disable=SC1091 # Not following: /usr/local/bin/conda-init was not specified as input
6+
# Initialize Conda environment
7+
# shellcheck disable=SC1091
108
source /usr/local/bin/conda-init
119
conda activate llmstack
1210

13-
# shellcheck disable=SC2034 # result_query appears unused. Verify it or export it.
14-
result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/ | tee output-04-multiple-models/query-04-multiple-models.json)
11+
# Run the Python script to query the model and save the response to a file
12+
result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/v1/ | tee output-04-multiple-models/query-04-multiple-models.json)
13+
14+
# Check if model response is empty
15+
if [[ -z "$result_model" ]]; then
16+
echo "Error: Failed to retrieve model list. Response is empty."
17+
exit 1
18+
fi
19+
20+
# Check if query response is empty
21+
if [[ -z "$result_query" ]]; then
22+
echo "Error: Failed to retrieve query response. Response is empty."
23+
exit 1
24+
fi
25+
26+
echo "Requests were successful."

.github/curl-05-secure-vllm.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
3+
# Retrieve host and port from script arguments
4+
HOST=$1
5+
PORT=$2
6+
VLLM_API_KEY=abc123XYZ987 # API key for authentication
7+
8+
# Directory to store output
9+
OUTPUT_DIR="output-05-secure-vllm"
10+
[ ! -d "$OUTPUT_DIR" ] && mkdir "$OUTPUT_DIR" # Create directory if it doesn't exist
11+
chmod -R 777 "$OUTPUT_DIR" # Ensure full read/write permissions
12+
13+
# Fetch the model list with authentication and save the response to a file
14+
curl -s -H "Authorization: Bearer $VLLM_API_KEY" \
15+
"http://$HOST:$PORT/v1/models" | tee "$OUTPUT_DIR/models-05-secure-vllm.json"
16+
17+
# Run the text completion query with authentication and save the response to a file
18+
curl -s -X POST -H "Authorization: Bearer $VLLM_API_KEY" \
19+
-H "Content-Type: application/json" \
20+
-d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' \
21+
"http://$HOST:$PORT/v1/completions" | tee "$OUTPUT_DIR/query-05-secure-vllm.json"
22+
23+
# Validate model response
24+
if [[ ! -s "$OUTPUT_DIR/models-05-secure-vllm.json" ]]; then
25+
echo "Error: Model list request failed or returned an empty response."
26+
exit 1
27+
fi
28+
29+
# Validate query response
30+
if [[ ! -s "$OUTPUT_DIR/query-05-secure-vllm.json" ]]; then
31+
echo "Error: Completion request failed or returned an empty response."
32+
exit 1
33+
fi
34+
35+
echo "Requests completed successfully."

.github/port-forward.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@ fi
88

99
echo "Waiting for all llmstack pods to be in Running state..."
1010

11+
# Save output
12+
VAR="${1#curl-}"
13+
[ ! -d "output-$VAR" ] && mkdir "output-$VAR"
14+
chmod -R 777 "output-$VAR"
15+
16+
# Print router logs
17+
POD_NAME=$(kubectl get pods --no-headers -o custom-columns=":metadata.name" | grep '^vllm-deployment-router')
18+
kubectl wait --for=condition=ready pod/"$POD_NAME" --timeout=120s
19+
sudo kubectl logs -f "$POD_NAME" 2>&1 | sudo tee "output-$VAR/router.log" &
20+
21+
1122
# Loop to check if all llmstack-related pods are in the Running state
1223
while true; do
1324
# Get all pods containing "vllm" in their name and extract their STATUS column
@@ -29,4 +40,6 @@ sudo kubectl patch service vllm-router-service -p '{"spec":{"type":"NodePort"}}'
2940
ip=$(sudo minikube ip)
3041
port=$(sudo kubectl get svc vllm-router-service -o=jsonpath='{.spec.ports[0].nodePort}')
3142

43+
sleep 5
44+
3245
bash ".github/$1.sh" "$ip" "$port"
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
servingEngineSpec:
2+
strategy:
3+
type: Recreate
4+
runtimeClassName: ""
5+
modelSpec:
6+
- name: "opt125m"
7+
repository: "vllm/vllm-openai"
8+
tag: "latest"
9+
modelURL: "facebook/opt-125m"
10+
11+
replicaCount: 2
12+
13+
requestCPU: 6
14+
requestMemory: "16Gi"
15+
requestGPU: 0.5
16+
17+
pvcStorage: "10Gi"
18+
pvcAccessMode:
19+
- ReadWriteMany
20+
21+
vllmConfig:
22+
maxModelLen: 1024
23+
extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]
24+
25+
routerSpec:
26+
repository: "localhost:5000/git-act-router"
27+
imagePullPolicy: "IfNotPresent"
28+
enableRouter: true
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,8 @@ servingEngineSpec:
2424
pvcStorage: "10Gi"
2525
pvcAccessMode:
2626
- ReadWriteOnce
27+
28+
routerSpec:
29+
repository: "localhost:5000/git-act-router"
30+
imagePullPolicy: "IfNotPresent"
31+
enableRouter: true

.github/values-05-secure-vllm.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
servingEngineSpec:
2+
runtimeClassName: ""
3+
vllmApiKey: "abc123XYZ987"
4+
modelSpec:
5+
- name: "opt125m"
6+
repository: "vllm/vllm-openai"
7+
tag: "latest"
8+
modelURL: "facebook/opt-125m"
9+
10+
replicaCount: 1
11+
12+
requestCPU: 6
13+
requestMemory: "16Gi"
14+
requestGPU: 1
15+
16+
routerSpec:
17+
repository: "localhost:5000/git-act-router"
18+
imagePullPolicy: "IfNotPresent"
19+
enableRouter: true

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- name: Set up Python
3131
uses: actions/setup-python@v5
3232
with:
33-
python-version: '3.8'
33+
python-version: '3.12.8'
3434

3535
- name: Install Dependencies
3636
run: |

0 commit comments

Comments
 (0)