Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions deploy/kubernetes/ai-gateway/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ Install the Custom Resource Definitions (CRDs) for managing inference pools:

```bash
# Install Gateway API Inference Extension CRDs
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml

# Verify CRDs are installed
kubectl get crd | grep inference
Expand Down Expand Up @@ -259,7 +259,7 @@ helm uninstall aieg -n envoy-ai-gateway-system
helm uninstall eg -n envoy-gateway-system

# Remove Gateway API CRDs (optional)
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml

# Delete kind cluster
kind delete cluster --name semantic-router-cluster
Expand Down
4 changes: 2 additions & 2 deletions deploy/kubernetes/ai-gateway/configuration/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ data:
enableBackend: true
extensionManager:
backendResources:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
version: v1alpha2
version: v1
hooks:
xdsTranslator:
translation:
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/ai-gateway/configuration/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ rules:
- "list"
- "watch"
- apiGroups:
- "inference.networking.x-k8s.io"
- "inference.networking.k8s.io"
resources:
- "inferencepools"
verbs:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: vllm-semantic-router
Expand All @@ -7,12 +7,15 @@ metadata:
aigateway.envoyproxy.io/processing-body-mode: "buffered"
aigateway.envoyproxy.io/allow-mode-override: "true"
spec:
targetPortNumber: 50051
targetPorts:
- number: 50051
selector:
app: vllm-semantic-router
extensionRef:
matchLabels:
app: vllm-semantic-router
endpointPickerRef:
name: semantic-router
portNumber: 50051
port:
number: 50051
---
apiVersion: gateway.networking.k8s.io/v1
kind: GatewayClass
Expand Down Expand Up @@ -46,7 +49,7 @@ spec:
namespace: vllm-semantic-router-system
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: vllm-semantic-router
namespace: vllm-semantic-router-system
Expand All @@ -57,4 +60,3 @@ spec:
value: /
timeouts:
request: 60s

4 changes: 2 additions & 2 deletions website/docs/installation/kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ Install the Custom Resource Definitions (CRDs) for managing inference pools:

```bash
# Install Gateway API Inference Extension CRDs
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml

# Verify CRDs are installed
kubectl get crd | grep inference
Expand Down Expand Up @@ -257,7 +257,7 @@ helm uninstall aieg -n envoy-ai-gateway-system
helm uninstall eg -n envoy-gateway-system

# Remove Gateway API CRDs (optional)
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml

# Delete kind cluster
kind delete cluster --name semantic-router-cluster
Expand Down
Loading