Shaoting-Feng
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/curl-01-minimal-example.sh‎
Lines changed: 0 additions & 9 deletions b/‎.github/curl-01-minimal-example.sh‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎.github/curl-02-two-pods.sh‎
Lines changed: 22 additions & 7 deletions b/‎.github/curl-02-two-pods.sh‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎.github/curl-04-multiple-models.sh‎
Lines changed: 20 additions & 8 deletions b/‎.github/curl-04-multiple-models.sh‎
Lines changed: 20 additions & 8 deletions
diff --git a/‎.github/curl-05-secure-vllm.sh‎
Lines changed: 35 additions & 0 deletions b/‎.github/curl-05-secure-vllm.sh‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎.github/port-forward.sh‎
Lines changed: 13 additions & 0 deletions b/‎.github/port-forward.sh‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.github/values-01-2pods-minimal-example.yaml‎
Lines changed: 28 additions & 0 deletions b/‎.github/values-01-2pods-minimal-example.yaml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎.github/multiple-models.yaml‎ ‎.github/values-04-multiple-models.yaml‎.github/multiple-models.yaml renamed to .github/values-04-multiple-models.yaml
Lines changed: 5 additions & 0 deletions b/‎.github/multiple-models.yaml‎ ‎.github/values-04-multiple-models.yaml‎.github/multiple-models.yaml renamed to .github/values-04-multiple-models.yaml
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/values-05-secure-vllm.yaml‎
Lines changed: 19 additions & 0 deletions b/‎.github/values-05-secure-vllm.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 1 deletion
@@ -6,7 +6,7 @@ FIX #xxxx (*link existing issues this PR will resolve*)
 
 ---
 
-- [ ] Make sure the code changes pass the [pre-commit](CONTRIBUTING.md) checks.
+- [ ] Make sure the code changes pass the [pre-commit](../CONTRIBUTING.md) checks.
 - [ ] Sign-off your commit by using <code>-s</code> when doing <code>git commit</code>
 - [ ] Try to classify PRs for easy understanding of the type of changes, such as `[Bugfix]`, `[Feat]`, and `[CI]`.
 
 
@@ -1,9 +1,24 @@
 #!/bin/bash
 
-# Curl and save output
-[ ! -d "output-02-two-pods" ] && mkdir output-02-two-pods
-chmod -R 777 output-02-two-pods
-# shellcheck disable=SC2034  # result_model appears unused. Verify it or export it.
-result_model=$(curl -s http://"$1":"$2"/models | tee output-02-two-pods/output-02-two-pods.json)
-# shellcheck disable=SC2034  # result_query appears unused. Verify it or export it.
-result_query=$(curl -X POST http://"$1":"$2"/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-02-two-pods/output-02-two-pods.json)
+# Send a request to fetch the available models and save the response to a file
+result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-02-two-pods/models-02-two-pods.json)
+
+# Check if the response is empty
+if [[ -z "$result_model" ]]; then
+    echo "Error: Failed to retrieve model list. Response is empty."
+    exit 1
+fi
+
+# Send a request to generate a text completion and save the response to a file
+result_query=$(curl -s -X POST http://"$1":"$2"/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' \
+    | tee output-02-two-pods/query-02-two-pods.json)
+
+# Check if the response is empty
+if [[ -z "$result_query" ]]; then
+    echo "Error: Failed to retrieve query response. Response is empty."
+    exit 1
+fi
+
+echo "Requests were successful."
@@ -1,14 +1,26 @@
 #!/bin/bash
 
-# Curl and save output
-[ ! -d "output-04-multiple-models" ] && mkdir output-04-multiple-models
-chmod -R 777 output-04-multiple-models
-# shellcheck disable=SC2034  # result_model appears unused. Verify it or export it.
-result_model=$(curl -s "http://$1:$2/models" | tee output-04-multiple-models/models-04-multiple-models.json)
+# Send a request to fetch the available models and save the response to a file
+result_model=$(curl -s http://"$1":"$2"/v1/models | tee output-04-multiple-models/models-04-multiple-models.json)
 
-# shellcheck disable=SC1091  # Not following: /usr/local/bin/conda-init was not specified as input
+# Initialize Conda environment
+# shellcheck disable=SC1091
 source /usr/local/bin/conda-init
 conda activate llmstack
 
-# shellcheck disable=SC2034  # result_query appears unused. Verify it or export it.
-result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/ | tee output-04-multiple-models/query-04-multiple-models.json)
+# Run the Python script to query the model and save the response to a file
+result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base http://"$1":"$2"/v1/ | tee output-04-multiple-models/query-04-multiple-models.json)
+
+# Check if model response is empty
+if [[ -z "$result_model" ]]; then
+    echo "Error: Failed to retrieve model list. Response is empty."
+    exit 1
+fi
+
+# Check if query response is empty
+if [[ -z "$result_query" ]]; then
+    echo "Error: Failed to retrieve query response. Response is empty."
+    exit 1
+fi
+
+echo "Requests were successful."
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Retrieve host and port from script arguments
+HOST=$1
+PORT=$2
+VLLM_API_KEY=abc123XYZ987  # API key for authentication
+
+# Directory to store output
+OUTPUT_DIR="output-05-secure-vllm"
+[ ! -d "$OUTPUT_DIR" ] && mkdir "$OUTPUT_DIR"  # Create directory if it doesn't exist
+chmod -R 777 "$OUTPUT_DIR"  # Ensure full read/write permissions
+
+# Fetch the model list with authentication and save the response to a file
+curl -s -H "Authorization: Bearer $VLLM_API_KEY" \
+     "http://$HOST:$PORT/v1/models" | tee "$OUTPUT_DIR/models-05-secure-vllm.json"
+
+# Run the text completion query with authentication and save the response to a file
+curl -s -X POST -H "Authorization: Bearer $VLLM_API_KEY" \
+     -H "Content-Type: application/json" \
+     -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' \
+     "http://$HOST:$PORT/v1/completions" | tee "$OUTPUT_DIR/query-05-secure-vllm.json"
+
+# Validate model response
+if [[ ! -s "$OUTPUT_DIR/models-05-secure-vllm.json" ]]; then
+    echo "Error: Model list request failed or returned an empty response."
+    exit 1
+fi
+
+# Validate query response
+if [[ ! -s "$OUTPUT_DIR/query-05-secure-vllm.json" ]]; then
+    echo "Error: Completion request failed or returned an empty response."
+    exit 1
+fi
+
+echo "Requests completed successfully."
@@ -8,6 +8,17 @@ fi
 
 echo "Waiting for all llmstack pods to be in Running state..."
 
+# Save output
+VAR="${1#curl-}"
+[ ! -d "output-$VAR" ] && mkdir "output-$VAR"
+chmod -R 777 "output-$VAR"
+
+# Print router logs
+POD_NAME=$(kubectl get pods --no-headers -o custom-columns=":metadata.name" | grep '^vllm-deployment-router')
+kubectl wait --for=condition=ready pod/"$POD_NAME" --timeout=120s
+sudo kubectl logs -f "$POD_NAME" 2>&1 | sudo tee "output-$VAR/router.log" &
+
+
 # Loop to check if all llmstack-related pods are in the Running state
 while true; do
     # Get all pods containing "vllm" in their name and extract their STATUS column
@@ -29,4 +40,6 @@ sudo kubectl patch service vllm-router-service -p '{"spec":{"type":"NodePort"}}'
 ip=$(sudo minikube ip)
 port=$(sudo kubectl get svc vllm-router-service -o=jsonpath='{.spec.ports[0].nodePort}')
 
+sleep 5
+
 bash ".github/$1.sh" "$ip" "$port"
@@ -0,0 +1,28 @@
+servingEngineSpec:
+  strategy:
+    type: Recreate
+  runtimeClassName: ""
+  modelSpec:
+  - name: "opt125m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "facebook/opt-125m"
+
+    replicaCount: 2
+
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 0.5
+
+    pvcStorage: "10Gi"
+    pvcAccessMode:
+      - ReadWriteMany
+
+    vllmConfig:
+      maxModelLen: 1024
+      extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]
+
+routerSpec:
+  repository: "localhost:5000/git-act-router"
+  imagePullPolicy: "IfNotPresent"
+  enableRouter: true
@@ -24,3 +24,8 @@ servingEngineSpec:
     pvcStorage: "10Gi"
     pvcAccessMode:
       - ReadWriteOnce
+
+routerSpec:
+  repository: "localhost:5000/git-act-router"
+  imagePullPolicy: "IfNotPresent"
+  enableRouter: true
@@ -0,0 +1,19 @@
+servingEngineSpec:
+  runtimeClassName: ""
+  vllmApiKey: "abc123XYZ987"
+  modelSpec:
+  - name: "opt125m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "facebook/opt-125m"
+
+    replicaCount: 1
+
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 1
+
+routerSpec:
+  repository: "localhost:5000/git-act-router"
+  imagePullPolicy: "IfNotPresent"
+  enableRouter: true
@@ -30,7 +30,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.8'
+          python-version: '3.12.8'
 
       - name: Install Dependencies
         run: |