[CI/Build] Upgrade functionality test (#53)

Shaoting-Feng · web-flow · commit 2dc0aad1f1aa · 2025-02-03T08:57:36.000-08:00
Signed-off-by: Shaoting Feng &lt;shaotingf@uchicago.edu&gt;
diff --git a/.github/curl-01-minimal-example.sh b/.github/curl-01-minimal-example.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+# Curl and save output
+[ ! -d "output-01-minimal-example" ] && mkdir output-01-minimal-example
+chmod -R 777 output-01-minimal-example
+result_model=$(curl -s http://$1:$2/models | tee output-01-minimal-example/models-01-minimal-example.json)
+result_query=$(curl -X POST http://$1:$2/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output-01-minimal-example/query-01-minimal-example.json)
diff --git a/.github/curl-04-multiple-models.sh b/.github/curl-04-multiple-models.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Curl and save output
+[ ! -d "output-04-multiple-models" ] && mkdir output-04-multiple-models
+chmod -R 777 output-04-multiple-models
+result_model=$(curl -s http://$1:$2/models | tee output-04-multiple-models/models-04-multiple-models.json)
+
+source /usr/local/bin/conda-init
+conda activate llmstack
+result_query=$(python3 tutorials/assets/example-04-openai.py --openai_api_base "http://$1:$2/" | tee output-04-multiple-models/query-04-multiple-models.json)
diff --git a/.github/multiple-models.yaml b/.github/multiple-models.yaml
@@ -0,0 +1,21 @@
+servingEngineSpec:
+  modelSpec:
+  - name: "opt125m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "facebook/opt-125m"
+    replicaCount: 1
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 1
+    pvcStorage: "10Gi"
+
+  - name: "smol135m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "HuggingFaceTB/SmolLM2-135M-Instruct"
+    replicaCount: 1
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 1
+    pvcStorage: "10Gi"
diff --git a/.github/port-forward.sh b/.github/port-forward.sh
@@ -29,8 +29,4 @@ sudo kubectl patch service vllm-router-service -p '{"spec":{"type":"NodePort"}}'
 ip=$(sudo minikube ip)
 port=$(sudo kubectl get svc vllm-router-service -o=jsonpath='{.spec.ports[0].nodePort}')
 
-# Curl and save output
-[ ! -d "output" ] && mkdir output
-chmod -R 777 output
-result_model=$(curl -s http://$ip:$port/models | tee output/models.json)
-result_query=$(curl -X POST http://$ip:$port/completions -H "Content-Type: application/json" -d '{"model": "facebook/opt-125m", "prompt": "Once upon a time,", "max_tokens": 10}' | tee output/query.json)
+bash .github/$1.sh $ip $port
diff --git a/.github/workflows/functionality-helm-chart.yml b/.github/workflows/functionality-helm-chart.yml
@@ -17,7 +17,7 @@ on:
       - 'helm/**'
   merge_group:
 jobs:
-  Check-Health-of-Cluster:
+  Minimal-Example:
     runs-on: self-hosted
     steps:
       - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
@@ -34,13 +34,37 @@ jobs:
           sudo helm install vllm vllm/vllm-stack -f tutorials/assets/values-01-minimal-example.yaml
       - name: Validate the installation and send query to the stack
         run: |
-          sudo bash .github/port-forward.sh
+          sudo bash .github/port-forward.sh curl-01-minimal-example
+        timeout-minutes: 2
       - name: Archive functionality results
         uses: actions/upload-artifact@v4
         with:
-          name: curl-models-query
+          name: output-01-minimal-example
           path: |
-            output/
+            output-01-minimal-example/
+      - name: Helm uninstall
+        run: |
+           sudo helm uninstall vllm
+        if: always()
+      - run: echo "🍏 This job's status is ${{ job.status }}."
+
+  Multiple-Models:
+    runs-on: self-hosted
+    needs: Minimal-Example
+    steps:
+      - name: Deploy via helm charts
+        run: |
+          sudo helm install vllm vllm/vllm-stack -f .github/multiple-models.yaml
+      - name: Validate the installation and send query to the stack
+        run: |
+          sudo bash .github/port-forward.sh curl-04-multiple-models
+        timeout-minutes: 2
+      - name: Archive functionality results
+        uses: actions/upload-artifact@v4
+        with:
+          name: output-04-multiple-models
+          path: |
+            output-04-multiple-models/
       - name: Helm uninstall
         run: |
            sudo helm uninstall vllm
diff --git a/tutorials/assets/example-04-openai.py b/tutorials/assets/example-04-openai.py
@@ -1,8 +1,25 @@
+import argparse
+
 from openai import OpenAI
 
+# Set up argument parsing
+parser = argparse.ArgumentParser(description="Use OpenAI API with custom base URL")
+parser.add_argument(
+    "--openai_api_base",
+    type=str,
+    default="http://localhost:30080/",
+    help="The base URL for the OpenAI API",
+)
+parser.add_argument(
+    "--openai_api_key", type=str, default="EMPTY", help="The API key for OpenAI"
+)
+
+# Parse the arguments
+args = parser.parse_args()
+
 # Modify OpenAI's API key and API base to use vLLM's API server.
-openai_api_key = "EMPTY"
-openai_api_base = "http://localhost:30080/"
+openai_api_key = args.openai_api_key
+openai_api_base = args.openai_api_base
 
 client = OpenAI(
     api_key=openai_api_key,