From 3b2a5955fc65a6ca1480d0097dfb11a1541e5852 Mon Sep 17 00:00:00 2001 From: Daneyon Hansen Date: Fri, 17 Oct 2025 11:53:50 -0700 Subject: [PATCH] Docs: Updates Latest/Main Quickstart Signed-off-by: Daneyon Hansen --- site-src/_includes/epp-latest.md | 43 +++++++++++++++++++++++ site-src/guides/getting-started-latest.md | 21 ++++++----- 2 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 site-src/_includes/epp-latest.md diff --git a/site-src/_includes/epp-latest.md b/site-src/_includes/epp-latest.md new file mode 100644 index 000000000..9f9a4e265 --- /dev/null +++ b/site-src/_includes/epp-latest.md @@ -0,0 +1,43 @@ +=== "GKE" + + ```bash + export GATEWAY_PROVIDER=gke + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ + --version $IGW_CHART_VERSION \ + oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool + ``` + +=== "Istio" + + ```bash + export GATEWAY_PROVIDER=istio + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ + --version $IGW_CHART_VERSION \ + oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool + ``` + +=== "Kgateway" + + ```bash + export GATEWAY_PROVIDER=none + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ + --version $IGW_CHART_VERSION \ + oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool + ``` + +=== "Agentgateway" + + ```bash + export GATEWAY_PROVIDER=none + helm install vllm-llama3-8b-instruct \ + --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ + --set provider.name=$GATEWAY_PROVIDER \ + --version $IGW_CHART_VERSION \ + oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool + ``` diff --git a/site-src/guides/getting-started-latest.md b/site-src/guides/getting-started-latest.md index a11e70fa5..e436a5cfe 100644 --- a/site-src/guides/getting-started-latest.md +++ b/site-src/guides/getting-started-latest.md @@ -26,13 +26,13 @@ --8<-- "site-src/_includes/model-server-cpu.md" ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/sim-deployment.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml ``` --8<-- "site-src/_includes/model-server-sim.md" ```bash - kubectl apply -f https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/refs/tags/v1.0.0/config/manifests/vllm/sim-deployment.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/sim-deployment.yaml ``` ### Install the Inference Extension CRDs @@ -51,7 +51,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens export IGW_CHART_VERSION=v0 ``` ---8<-- "site-src/_includes/epp.md" +--8<-- "site-src/_includes/epp-latest.md" ### Deploy an Inference Gateway @@ -147,8 +147,9 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens === "Kgateway" - [Kgateway](https://kgateway.dev/) added Inference Gateway support as a **technical preview** in the - [v2.0.0 release](https://github.com/kgateway-dev/kgateway/releases/tag/v2.0.0). InferencePool v1.0.1 is currently supported in the latest [rolling release](https://github.com/kgateway-dev/kgateway/releases/tag/v2.1.0-main), which includes the latest changes but may be unstable until the [v2.1.0 release](https://github.com/kgateway-dev/kgateway/milestone/58) is published. + [Kgateway](https://kgateway.dev/) is a Gateway API and Inference Gateway + [conformant](https://github.com/kubernetes-sigs/gateway-api-inference-extension/tree/main/conformance/reports/v1.0.0/gateway/kgateway) + gateway. Follow these steps to run Kgateway: 1. Requirements @@ -158,7 +159,7 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens 2. Set the Kgateway version and install the Kgateway CRDs. ```bash - KGTW_VERSION=v2.1.0-main + KGTW_VERSION=v2.2.0-main helm upgrade -i --create-namespace --namespace kgateway-system --version $KGTW_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds ``` @@ -195,7 +196,9 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens === "Agentgateway" - [Agentgateway](https://agentgateway.dev/) is a purpose-built proxy designed for AI workloads, and comes with native support for Inference Gateway. Agentgateway integrates with [Kgateway](https://kgateway.dev/) as it's control plane. InferencePool v1.0.0 is currently supported in the latest [rolling release](https://github.com/kgateway-dev/kgateway/releases/tag/v2.1.0-main), which includes the latest changes but may be unstable until the [v2.1.0 release](https://github.com/kgateway-dev/kgateway/milestone/58) is published. + [Agentgateway](https://agentgateway.dev/) is a purpose-built proxy designed for AI workloads, and comes with native support for Inference Gateway. + Agentgateway integrates with [Kgateway](https://kgateway.dev/) as it's control plane. Follow these steps to run Kgateway with the agentgateway + data plane: 1. Requirements @@ -205,14 +208,14 @@ kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extens 2. Set the Kgateway version and install the Kgateway CRDs. ```bash - KGTW_VERSION=v2.1.0-main + KGTW_VERSION=v2.2.0-main helm upgrade -i --create-namespace --namespace kgateway-system --version $KGTW_VERSION kgateway-crds oci://cr.kgateway.dev/kgateway-dev/charts/kgateway-crds ``` 3. Install Kgateway ```bash - helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true --set agentGateway.enabled=true + helm upgrade -i --namespace kgateway-system --version $KGTW_VERSION kgateway oci://cr.kgateway.dev/kgateway-dev/charts/kgateway --set inferenceExtension.enabled=true --set agentgateway.enabled=true ``` 4. Deploy the Gateway