From 5f09d0fb2ba63af14ae56840fe89f5901cf8f178 Mon Sep 17 00:00:00 2001 From: bitliu Date: Sat, 11 Oct 2025 17:32:40 +0800 Subject: [PATCH] feat: support inferencepool v1 Signed-off-by: bitliu --- deploy/kubernetes/ai-gateway/README.md | 4 ++-- .../ai-gateway/configuration/config.yaml | 4 ++-- .../ai-gateway/configuration/rbac.yaml | 2 +- .../inference-pool/inference-pool.yaml | 16 +++++++++------- website/docs/installation/kubernetes.md | 4 ++-- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/deploy/kubernetes/ai-gateway/README.md b/deploy/kubernetes/ai-gateway/README.md index 146077cf..5789a7ee 100644 --- a/deploy/kubernetes/ai-gateway/README.md +++ b/deploy/kubernetes/ai-gateway/README.md @@ -85,7 +85,7 @@ Install the Custom Resource Definitions (CRDs) for managing inference pools: ```bash # Install Gateway API Inference Extension CRDs -kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml # Verify CRDs are installed kubectl get crd | grep inference @@ -259,7 +259,7 @@ helm uninstall aieg -n envoy-ai-gateway-system helm uninstall eg -n envoy-gateway-system # Remove Gateway API CRDs (optional) -kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml +kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml # Delete kind cluster kind delete cluster --name semantic-router-cluster diff --git a/deploy/kubernetes/ai-gateway/configuration/config.yaml b/deploy/kubernetes/ai-gateway/configuration/config.yaml index c6a26686..872409a2 100644 --- a/deploy/kubernetes/ai-gateway/configuration/config.yaml +++ b/deploy/kubernetes/ai-gateway/configuration/config.yaml @@ -37,9 +37,9 @@ data: enableBackend: true extensionManager: backendResources: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool - version: v1alpha2 + version: v1 hooks: xdsTranslator: translation: diff --git a/deploy/kubernetes/ai-gateway/configuration/rbac.yaml b/deploy/kubernetes/ai-gateway/configuration/rbac.yaml index 4e3b337a..95665224 100644 --- a/deploy/kubernetes/ai-gateway/configuration/rbac.yaml +++ b/deploy/kubernetes/ai-gateway/configuration/rbac.yaml @@ -14,7 +14,7 @@ rules: - "list" - "watch" - apiGroups: - - "inference.networking.x-k8s.io" + - "inference.networking.k8s.io" resources: - "inferencepools" verbs: diff --git a/deploy/kubernetes/ai-gateway/inference-pool/inference-pool.yaml b/deploy/kubernetes/ai-gateway/inference-pool/inference-pool.yaml index 48129f5d..64afc6f9 100644 --- a/deploy/kubernetes/ai-gateway/inference-pool/inference-pool.yaml +++ b/deploy/kubernetes/ai-gateway/inference-pool/inference-pool.yaml @@ -1,4 +1,4 @@ -apiVersion: inference.networking.x-k8s.io/v1alpha2 +apiVersion: inference.networking.k8s.io/v1 kind: InferencePool metadata: name: vllm-semantic-router @@ -7,12 +7,15 @@ metadata: aigateway.envoyproxy.io/processing-body-mode: "buffered" aigateway.envoyproxy.io/allow-mode-override: "true" spec: - targetPortNumber: 50051 + targetPorts: + - number: 50051 selector: - app: vllm-semantic-router - extensionRef: + matchLabels: + app: vllm-semantic-router + endpointPickerRef: name: semantic-router - portNumber: 50051 + port: + number: 50051 --- apiVersion: gateway.networking.k8s.io/v1 kind: GatewayClass @@ -46,7 +49,7 @@ spec: namespace: vllm-semantic-router-system rules: - backendRefs: - - group: inference.networking.x-k8s.io + - group: inference.networking.k8s.io kind: InferencePool name: vllm-semantic-router namespace: vllm-semantic-router-system @@ -57,4 +60,3 @@ spec: value: / timeouts: request: 60s - diff --git a/website/docs/installation/kubernetes.md b/website/docs/installation/kubernetes.md index 38e3c77e..abad76f2 100644 --- a/website/docs/installation/kubernetes.md +++ b/website/docs/installation/kubernetes.md @@ -86,7 +86,7 @@ Install the Custom Resource Definitions (CRDs) for managing inference pools: ```bash # Install Gateway API Inference Extension CRDs -kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml # Verify CRDs are installed kubectl get crd | grep inference @@ -257,7 +257,7 @@ helm uninstall aieg -n envoy-ai-gateway-system helm uninstall eg -n envoy-gateway-system # Remove Gateway API CRDs (optional) -kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v0.5.1/manifests.yaml +kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/releases/download/v1.0.1/manifests.yaml # Delete kind cluster kind delete cluster --name semantic-router-cluster