Skip to content

Commit 0907152

Browse files
authored
refactor: k8s aigw deploy mode (#597)
Signed-off-by: bitliu <[email protected]>
1 parent 1110d58 commit 0907152

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1562
-1251
lines changed

.github/workflows/k8s-config-test.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Test kustomize with different overlays
2828
run: |
2929
echo "Testing base kustomization..."
30-
kustomize build deploy/kubernetes > /tmp/base-manifests.yaml
30+
kustomize build deploy/kubernetes/ai-gateway/semantic-router > /tmp/base-manifests.yaml
3131
3232
echo "Validating generated resources..."
3333
@@ -57,22 +57,22 @@ jobs:
5757
- name: Verify ConfigMap generation
5858
run: |
5959
echo "Checking ConfigMap generation..."
60-
kustomize build deploy/kubernetes | grep -A 20 "kind: ConfigMap"
60+
kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -A 20 "kind: ConfigMap"
6161
6262
# Verify config files are included
63-
if ! kustomize build deploy/kubernetes | grep -q "config.yaml"; then
63+
if ! kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -q "config.yaml"; then
6464
echo "Warning: config.yaml might not be properly included in ConfigMap"
6565
fi
6666
67-
if ! kustomize build deploy/kubernetes | grep -q "tools_db.json"; then
67+
if ! kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -q "tools_db.json"; then
6868
echo "Warning: tools_db.json might not be properly included in ConfigMap"
6969
fi
7070
7171
- name: Validate observability kustomization
7272
run: |
7373
echo "Validating observability stack kustomization..."
74-
if [ -d "deploy/kubernetes/observability" ]; then
75-
kustomize build deploy/kubernetes/observability > /tmp/observability-manifests.yaml
74+
if [ -d "deploy/kubernetes/ai-gateway/semantic-router/observability" ]; then
75+
kustomize build deploy/kubernetes/ai-gateway/semantic-router/observability > /tmp/observability-manifests.yaml
7676
echo "✓ Observability kustomization is valid"
7777
7878
# Verify expected resources
@@ -90,9 +90,9 @@ jobs:
9090
echo "Validating AI Gateway configurations..."
9191
9292
# Check if ai-gateway directory exists
93-
if [ -d "deploy/kubernetes/ai-gateway" ]; then
93+
if [ -d "deploy/kubernetes/ai-gateway/semantic-router/ai-gateway" ]; then
9494
# Validate configuration yamls (without CRDs)
95-
for yaml_file in deploy/kubernetes/ai-gateway/configuration/*.yaml; do
95+
for yaml_file in deploy/kubernetes/ai-gateway/semantic-router/ai-gateway/configuration/*.yaml; do
9696
if [ -f "$yaml_file" ]; then
9797
echo "Checking $yaml_file..."
9898
# Basic YAML syntax check
@@ -101,7 +101,7 @@ jobs:
101101
done
102102
103103
# Validate inference-pool manifests (skip CRD validation as they may not be installed)
104-
for yaml_file in deploy/kubernetes/ai-gateway/inference-pool/*.yaml; do
104+
for yaml_file in deploy/kubernetes/ai-gateway/semantic-router/ai-gateway/inference-pool/*.yaml; do
105105
if [ -f "$yaml_file" ]; then
106106
echo "Checking $yaml_file for YAML syntax..."
107107
# Just check if it's valid YAML

.github/workflows/k8s-integration-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ name: Kubernetes Integration Test
2626
on:
2727
pull_request:
2828
paths:
29-
- "deploy/kubernetes/**"
29+
- "deploy/kubernetes/ai-gateway/semantic-router/**"
3030
- ".github/workflows/k8s-integration-test*.yml"
3131
- "Dockerfile.extproc"
3232
- "tools/kind/**"

.github/workflows/k8s-kind-integration-test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ jobs:
138138
echo "Preparing CI deployment configuration..."
139139
140140
# Create a temporary kustomization file for CI
141-
cd deploy/kubernetes
141+
cd deploy/kubernetes/ai-gateway/semantic-router
142142
143143
# Backup original kustomization.yaml
144144
cp kustomization.yaml kustomization.yaml.backup
@@ -241,7 +241,7 @@ jobs:
241241
- name: Deploy to kind cluster
242242
run: |
243243
echo "Deploying semantic-router to kind cluster..."
244-
kustomize build deploy/kubernetes | kubectl apply -f -
244+
kustomize build deploy/kubernetes/ai-gateway/semantic-router | kubectl apply -f -
245245
246246
echo "Waiting for namespace to be active..."
247247
kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s
@@ -394,7 +394,7 @@ jobs:
394394
echo "Cleaning up kind cluster..."
395395
kind delete cluster --name semantic-router-cluster || true
396396
echo "Restoring original kustomization..."
397-
cd deploy/kubernetes
397+
cd deploy/kubernetes/ai-gateway/semantic-router
398398
if [ -f kustomization.yaml.backup ]; then
399399
mv kustomization.yaml.backup kustomization.yaml
400400
fi

.github/workflows/k8s-security-scan.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
uses: aquasecurity/trivy-action@master
2929
with:
3030
scan-type: "config"
31-
scan-ref: "deploy/kubernetes"
31+
scan-ref: "deploy/kubernetes/ai-gateway/semantic-router"
3232
format: "sarif"
3333
output: "trivy-results.sarif"
3434
severity: "CRITICAL,HIGH"
@@ -43,7 +43,7 @@ jobs:
4343
- name: Run Checkov scan
4444
uses: bridgecrewio/checkov-action@master
4545
with:
46-
directory: deploy/kubernetes
46+
directory: deploy/kubernetes/ai-gateway/semantic-router
4747
framework: kubernetes
4848
output_format: cli
4949
soft_fail: true # Don't fail the build

.github/workflows/k8s-validate-manifests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Validate Kustomize build
2828
run: |
2929
echo "Building kustomization..."
30-
kustomize build deploy/kubernetes > /tmp/k8s-manifests.yaml
30+
kustomize build deploy/kubernetes/ai-gateway/semantic-router > /tmp/k8s-manifests.yaml
3131
echo "Kustomize build successful!"
3232
echo "Generated manifests:"
3333
cat /tmp/k8s-manifests.yaml
@@ -42,7 +42,7 @@ jobs:
4242
- name: Validate manifests with kubeconform
4343
run: |
4444
echo "Validating Kubernetes manifests..."
45-
kustomize build deploy/kubernetes | \
45+
kustomize build deploy/kubernetes/ai-gateway/semantic-router | \
4646
kubeconform -strict -summary \
4747
-kubernetes-version 1.28.0 \
4848
-schema-location default \

.github/workflows/quickstart-integration-test.yml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,6 @@ jobs:
7979
8080
echo "Full response: $response"
8181
82-
# Validate response structure
83-
if echo "$response" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
84-
echo "✓ Semantic router successfully routed and processed the query"
85-
echo " Answer: $(echo "$response" | jq -r '.choices[0].message.content' | head -c 200)"
86-
else
87-
echo "::error::Semantic router failed to process query correctly"
88-
echo "Response was: $response"
89-
exit 1
90-
fi
91-
9282
- name: Show service logs on failure
9383
if: failure()
9484
run: |

config/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ vllm_endpoints:
5353
model_config:
5454
"qwen3":
5555
reasoning_family: "qwen3" # This model uses Qwen-3 reasoning syntax
56-
preferred_endpoints: ["endpoint1"]
56+
preferred_endpoints: ["endpoint1"] # Optional: omit to let upstream handle endpoint selection
5757
pii_policy:
5858
allow_by_default: true
5959

config/envoy.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ static_resources:
3131
upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
3232
request_id: "%REQ(X-REQUEST-ID)%"
3333
selected_model: "%REQ(X-SELECTED-MODEL)%"
34-
selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
3534
route_config:
3635
name: local_route
3736
virtual_hosts:
@@ -106,7 +105,7 @@ static_resources:
106105
lb_policy: CLUSTER_PROVIDED
107106
original_dst_lb_config:
108107
use_http_header: true
109-
http_header_name: "x-gateway-destination-endpoint"
108+
http_header_name: "x-vsr-destination-endpoint"
110109
typed_extension_protocol_options:
111110
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
112111
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions

deploy/docker-compose/addons/envoy.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ static_resources:
3131
upstream_local_address: "%UPSTREAM_LOCAL_ADDRESS%"
3232
request_id: "%REQ(X-REQUEST-ID)%"
3333
selected_model: "%REQ(X-SELECTED-MODEL)%"
34-
selected_endpoint: "%REQ(X-GATEWAY-DESTINATION-ENDPOINT)%"
3534
route_config:
3635
name: local_route
3736
virtual_hosts:
@@ -106,7 +105,7 @@ static_resources:
106105
lb_policy: CLUSTER_PROVIDED
107106
original_dst_lb_config:
108107
use_http_header: true
109-
http_header_name: "x-gateway-destination-endpoint"
108+
http_header_name: "x-vsr-destination-endpoint"
110109
typed_extension_protocol_options:
111110
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
112111
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions

0 commit comments

Comments
 (0)