Skip to content

Commit 8001bda

Browse files
committed
Merge remote-tracking branch 'origin/pr/transcription-whisper' into pr/transcription-whisper
2 parents 1d37980 + a8bb67d commit 8001bda

File tree

158 files changed

+9356
-4953
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

158 files changed

+9356
-4953
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
servingEngineSpec:
2+
strategy:
3+
type: Recreate
4+
runtimeClassName: ""
5+
modelSpec:
6+
- name: "opt125m"
7+
repository: "vllm/vllm-openai"
8+
tag: "latest"
9+
modelURL: "facebook/opt-125m"
10+
11+
replicaCount: 2
12+
13+
requestCPU: 6
14+
requestMemory: "16Gi"
15+
requestGPU: 0.5
16+
17+
pvcStorage: "10Gi"
18+
pvcAccessMode:
19+
- ReadWriteMany
20+
21+
vllmConfig:
22+
maxModelLen: 1024
23+
extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]
24+
25+
routerSpec:
26+
repository: "localhost:5000/git-act-router"
27+
imagePullPolicy: "IfNotPresent"
28+
enableRouter: true
29+
routingLogic: "session"
30+
sessionKey: "x-user-id"
31+
extraArgs:
32+
- "--log-level"
33+
- "info"

.github/wait-for-pods.sh

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
# Colors for output
6+
RED='\033[0;31m'
7+
GREEN='\033[0;32m'
8+
YELLOW='\033[1;33m'
9+
NC='\033[0m' # No Color
10+
11+
# Function to print status messages
12+
print_status() {
13+
echo -e "${GREEN}[INFO]${NC} $1"
14+
}
15+
16+
print_error() {
17+
echo -e "${RED}[ERROR]${NC} $1"
18+
}
19+
20+
print_warning() {
21+
echo -e "${YELLOW}[WARNING]${NC} $1"
22+
}
23+
24+
# Default values
25+
POD_PREFIX="vllm"
26+
TIMEOUT_SECONDS=300
27+
VERBOSE=false
28+
29+
# Function to show usage
30+
usage() {
31+
echo "Usage: $0 [OPTIONS]"
32+
echo "Options:"
33+
echo " --pod-prefix PREFIX Pod name prefix to search for (default: vllm)"
34+
echo " --timeout SECONDS Timeout in seconds (default: 300)"
35+
echo " --verbose Enable verbose output"
36+
echo " --help Show this help message"
37+
echo ""
38+
echo "Examples:"
39+
echo " $0 # Wait for pods with 'vllm' prefix"
40+
echo " $0 --pod-prefix myapp --timeout 600 # Wait for 'myapp' pods with 10min timeout"
41+
echo " $0 --verbose # Wait with verbose output"
42+
}
43+
44+
# Parse command line arguments
45+
while [[ $# -gt 0 ]]; do
46+
case $1 in
47+
--pod-prefix)
48+
POD_PREFIX="$2"
49+
shift 2
50+
;;
51+
--timeout)
52+
TIMEOUT_SECONDS="$2"
53+
shift 2
54+
;;
55+
--verbose)
56+
VERBOSE=true
57+
shift
58+
;;
59+
--help)
60+
usage
61+
exit 0
62+
;;
63+
*)
64+
print_error "Unknown option: $1"
65+
usage
66+
exit 1
67+
;;
68+
esac
69+
done
70+
71+
# Validate inputs
72+
if ! [[ "$TIMEOUT_SECONDS" =~ ^[0-9]+$ ]]; then
73+
print_error "Timeout must be a positive integer"
74+
exit 1
75+
fi
76+
77+
print_status "⏳ Waiting for all $POD_PREFIX pods to be in Running state"
78+
print_status "Timeout: ${TIMEOUT_SECONDS} seconds"
79+
80+
# Track start time for timeout
81+
START_TIME=$SECONDS
82+
83+
# Use the same approach as existing functionality tests
84+
while true; do
85+
# Get all pods containing the prefix in their name and check their status
86+
if ! kubectl get pods --no-headers | grep -q "$POD_PREFIX"; then
87+
print_warning "No $POD_PREFIX pods found yet. Checking again in 5 seconds..."
88+
sleep 5
89+
90+
# Check timeout
91+
if [ $((SECONDS - START_TIME)) -gt "$TIMEOUT_SECONDS" ]; then
92+
print_error "❌ Timeout waiting for $POD_PREFIX pods to appear"
93+
print_error "No pods with prefix '$POD_PREFIX' found after ${TIMEOUT_SECONDS} seconds"
94+
kubectl get pods
95+
exit 1
96+
fi
97+
continue
98+
fi
99+
100+
# Get pod status and readiness
101+
pod_status=$(kubectl get pods --no-headers | grep "$POD_PREFIX" | awk '{print $3}' | sort | uniq)
102+
pod_ready=$(kubectl get pods --no-headers | grep "$POD_PREFIX" | awk '{print $2}' | sort | uniq)
103+
104+
if [ "$VERBOSE" = true ]; then
105+
print_status "Current pod status: $pod_status, Ready: $pod_ready"
106+
kubectl get pods | grep "$POD_PREFIX" || true
107+
fi
108+
109+
# If all pods are Running and Ready (1/1), break the loop
110+
if [[ "$pod_status" == "Running" ]] && [[ "$pod_ready" == "1/1" ]]; then
111+
print_status "All $POD_PREFIX pods are now Ready and in Running state."
112+
break
113+
fi
114+
115+
print_status "Not all $POD_PREFIX pods are ready yet. Current status: $pod_status, Ready: $pod_ready"
116+
print_status "Checking again in 5 seconds..."
117+
sleep 5
118+
119+
# Check timeout
120+
if [ $((SECONDS - START_TIME)) -gt "$TIMEOUT_SECONDS" ]; then
121+
print_error "❌ Timeout waiting for $POD_PREFIX pods to be ready"
122+
print_error "Pods were not ready after ${TIMEOUT_SECONDS} seconds"
123+
print_error "Current pod status:"
124+
kubectl get pods | grep "$POD_PREFIX" || true
125+
print_error "Pod descriptions:"
126+
kubectl describe pods | grep -A 20 -B 5 "$POD_PREFIX" || true
127+
exit 1
128+
fi
129+
done
130+
131+
# Show final pod status
132+
print_status "✅ All $POD_PREFIX pods are ready:"
133+
kubectl get pods | grep "$POD_PREFIX"
134+
135+
print_status "🎉 Pod readiness check completed successfully in $((SECONDS - START_TIME)) seconds"

.github/workflows/functionality-helm-chart.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,26 @@ jobs:
3232
uses: actions/checkout@v4
3333
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
3434
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
35+
- name: Helm uninstall
36+
run: |
37+
releases=$(helm list -q)
38+
39+
if [ -n "$releases" ]; then
40+
echo "Found releases: $releases"
41+
for r in $releases; do
42+
echo "Uninstalling $r..."
43+
helm uninstall "$r"
44+
done
45+
else
46+
echo "No Helm releases found; skipping uninstall."
47+
fi
48+
49+
# Wait until no pods remain
50+
echo "Waiting for all pods to terminate..."
51+
while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
52+
sleep 5
53+
done
54+
echo "All pods have terminated."
3555
- name: Deploy via helm charts
3656
env:
3757
DOCKER_BUILDKIT: 1
@@ -65,6 +85,26 @@ jobs:
6585
runs-on: self-hosted
6686
needs: Secure-Minimal-Example
6787
steps:
88+
- name: Helm uninstall
89+
run: |
90+
releases=$(helm list -q)
91+
92+
if [ -n "$releases" ]; then
93+
echo "Found releases: $releases"
94+
for r in $releases; do
95+
echo "Uninstalling $r..."
96+
helm uninstall "$r"
97+
done
98+
else
99+
echo "No Helm releases found; skipping uninstall."
100+
fi
101+
102+
# Wait until no pods remain
103+
echo "Waiting for all pods to terminate..."
104+
while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
105+
sleep 5
106+
done
107+
echo "All pods have terminated."
68108
- name: Deploy via helm charts
69109
run: |
70110
cd ${{ github.workspace }}
@@ -90,6 +130,26 @@ jobs:
90130
runs-on: self-hosted
91131
needs: Two-Pods-Minimal-Example
92132
steps:
133+
- name: Helm uninstall
134+
run: |
135+
releases=$(helm list -q)
136+
137+
if [ -n "$releases" ]; then
138+
echo "Found releases: $releases"
139+
for r in $releases; do
140+
echo "Uninstalling $r..."
141+
helm uninstall "$r"
142+
done
143+
else
144+
echo "No Helm releases found; skipping uninstall."
145+
fi
146+
147+
# Wait until no pods remain
148+
echo "Waiting for all pods to terminate..."
149+
while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
150+
sleep 5
151+
done
152+
echo "All pods have terminated."
93153
- name: Deploy via helm charts
94154
run: |
95155
helm install vllm ./helm -f .github/values-04-multiple-models.yaml

.github/workflows/router-e2e-test.yml

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,106 @@ jobs:
100100
path: |
101101
~/.kube/config
102102
src/tests/perftest/logs
103+
104+
sticky-routing-e2e-test:
105+
runs-on: self-hosted
106+
needs: e2e-test
107+
if: github.event.pull_request.draft == false
108+
steps:
109+
- name: Check out repository code
110+
uses: actions/checkout@v4
111+
112+
- name: Setup Python
113+
uses: actions/setup-python@v5
114+
with:
115+
python-version: "3.12"
116+
117+
- name: Install Python dependencies
118+
run: |
119+
python -m pip install --upgrade pip
120+
pip install -r benchmarks/multi-round-qa/requirements.txt
121+
pip install -e .
122+
123+
- name: Setup minikube environment
124+
env:
125+
DOCKER_BUILDKIT: 1
126+
run: |
127+
echo "🔧 Setting up minikube environment"
128+
sudo sysctl fs.protected_regular=0
129+
# Verify minikube is running
130+
minikube status
131+
# Ensure kubectl is configured for minikube
132+
kubectl config use-context minikube
133+
134+
- name: Build and deploy router image
135+
env:
136+
DOCKER_BUILDKIT: 1
137+
run: |
138+
echo "🔨 Building router docker image"
139+
cd ${{ github.workspace }}
140+
sudo docker build --build-arg INSTALL_OPTIONAL_DEP=default -t localhost:5000/git-act-router -f docker/Dockerfile .
141+
sudo docker push localhost:5000/git-act-router
142+
minikube image load localhost:5000/git-act-router
143+
144+
- name: Deploy two-pods setup via helm charts
145+
run: |
146+
echo "🚀 Deploying two-pods setup with helm"
147+
cd ${{ github.workspace }}
148+
helm install vllm ./helm -f .github/values-06-session-routing.yaml
149+
150+
- name: Wait for pods to be ready
151+
run: |
152+
echo "⏳ Making wait-for-pods script executable and running it"
153+
chmod +x .github/wait-for-pods.sh
154+
./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose
155+
156+
- name: Make test script executable
157+
run: |
158+
chmod +x tests/e2e/test-sticky-routing.sh
159+
160+
- name: Run sticky routing e2e test
161+
run: |
162+
echo "🧪 Running sticky routing test"
163+
cd ${{ github.workspace }}
164+
# Set the model to match what's deployed in the helm values
165+
# Enable debug mode to preserve temp files for artifact collection
166+
./tests/e2e/test-sticky-routing.sh --model "facebook/opt-125m" --num-rounds 3 --verbose --debug
167+
timeout-minutes: 10
168+
169+
- name: Archive sticky routing test results
170+
uses: actions/upload-artifact@v4
171+
if: always()
172+
with:
173+
name: sticky-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
174+
path: |
175+
/tmp/sticky-routing-results-*
176+
177+
- name: Get router and pod logs for debugging
178+
if: always()
179+
run: |
180+
echo "📋 Collecting logs for debugging"
181+
mkdir -p debug-logs
182+
# Get router logs
183+
kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true
184+
# Get serving engine logs
185+
kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true
186+
# Get pod status
187+
kubectl get pods -o wide > debug-logs/pod-status.txt || true
188+
# Get services
189+
kubectl get svc > debug-logs/services.txt || true
190+
191+
- name: Upload debug logs
192+
uses: actions/upload-artifact@v4
193+
if: always()
194+
with:
195+
name: debug-logs-pr-${{ github.event.pull_request.number || 'main' }}
196+
path: debug-logs/
197+
198+
- name: Helm uninstall and cleanup
199+
run: |
200+
echo "🧹 Cleaning up resources"
201+
helm uninstall vllm || true
202+
sudo docker image prune -f || true
203+
if: always()
204+
205+
- run: echo "🍏 Sticky routing e2e test job status is ${{ job.status }}."

0 commit comments

Comments
 (0)