davidbreitgand · davidbreitgand · Sep 14, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 28, 2025
diff --git a/.gitignore b/.gitignore
@@ -33,3 +33,9 @@ site
 
 # MacOS generated files
 **/.DS_Store
+
+#archives
+*.gz
+
+#binaries
+istioctl
diff --git a/bbr.Dockerfile b/bbr.Dockerfile
@@ -18,6 +18,7 @@ RUN go mod download
 COPY cmd/bbr ./cmd
 COPY pkg ./pkg
 COPY internal ./internal
+COPY api ./api
 WORKDIR /src/cmd
 RUN go build -o /bbr
 

diff --git a/config/charts/body-based-routing/values.yaml b/config/charts/body-based-routing/values.yaml
@@ -4,8 +4,10 @@ bbr:
   image:
     name: bbr
     hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
-    tag: main
-    pullPolicy: Always
+    #tag: main
+    tag: 309637b-dirty #output of git describe --tags --dirty --always
+    #pullPolicy: Always
+    pullPolicy: IfNotPresent #for local development; because the image is on kind
   port: 9004
   healthCheckPort: 9005
 

diff --git a/config/manifests/bbr/bbr-multi-model.yaml b/config/manifests/bbr/bbr-multi-model.yaml
@@ -0,0 +1,33 @@
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: routes-to-llms
+spec:
+  parentRefs:
+  - name: inference-gateway
+  rules:
+  - matches:
+    - headers:
+      - type: Exact
+        #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+        name: X-Gateway-Model-Name
+        value: 'meta-llama/Llama-3.1-8B-Instruct'
+      path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: vllm-llama3-8b-instruct
+      group: inference.networking.x-k8s.io
+      kind: InferencePool  
+  - matches:
+    - headers:
+      - type: Exact
+        name: X-Gateway-Model-Name 
+        value: 'deepseek/deepseek-r1'
+      path:
+        type: PathPrefix
+        value: /
+    backendRefs:
+    - name: deepseek-r1
+      group: inference.networking.x-k8s.io
+      kind: InferencePool
diff --git a/config/manifests/bbr/multi-model-route.yaml b/config/manifests/bbr/multi-model-route.yaml
@@ -0,0 +1,94 @@
+---   
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-llama-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: vllm-llama3-8b-instruct
+    matches:
+    - path:
+        type: PathPrefix
+        value: /
+      headers:
+        - type: Exact
+          #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+          name: X-Gateway-Model-Name # (1)!
+          value: 'meta-llama/Llama-3.1-8B-Instruct'
+    timeouts:
+      request: 300s
+---   
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: llm-deepseek-route
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: vllm-deepseek-r1
+    matches:
+    - path:
+        type: PathPrefix
+        value: /
+      headers:
+        - type: Exact
+          #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+          name: X-Gateway-Model-Name
+          value: 'deepseek/deepseek-r1'
+    - path:
+        type: PathPrefix
+        value: /
+      headers:
+        - type: Exact
+          #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+          name: X-Gateway-Model-Name
+          value: 'food-review'
+    - path:
+        type: PathPrefix
+        value: /
+      headers:
+        - type: Exact
+          #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+          name: X-Gateway-Model-Name
+          value: 'movie-critique'
+    timeouts:
+      request: 300s
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: vllm-llama3-8b-instruct-lora-food-review-1 #give this HTTPRoute any name that helps you to group and track the routes
+spec:
+  parentRefs:
+  - group: gateway.networking.k8s.io
+    kind: Gateway
+    name: inference-gateway
+  rules:
+  - backendRefs:
+    - group: inference.networking.k8s.io
+      kind: InferencePool
+      name: vllm-llama3-8b-instruct
+    matches:
+    - path:
+        type: PathPrefix
+        value: /
+      headers:
+        - type: Exact
+          #Body-Based routing(https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/bbr/README.md) is being used to copy the model name from the request body to the header.
+          name: X-Gateway-Model-Name # (1)!
+          value: 'food-review-1'   #this is the name of LoRA as defined in vLLM deployment
+    timeouts:
+      request: 300s
diff --git a/config/manifests/vllm/sim-deployment-deepseek.yaml b/config/manifests/vllm/sim-deployment-deepseek.yaml
@@ -0,0 +1,44 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-deepseek-r1
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: vllm-deepseek-r1
+  template:
+    metadata:
+      labels:
+        app: vllm-deepseek-r1
+    spec:
+      containers:
+      - name: vllm-sim
+        image: ghcr.io/llm-d/llm-d-inference-sim:v0.4.0
+        imagePullPolicy: Always
+        args:
+        - --model
+        - deepseek/deepseek-r1
+        - --port
+        - "8000"
+        - --max-loras
+        - "2"
+        - --lora-modules
+        - '{"name": "food-review"}'
+        - '{"name": "movie-critique"}'
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        ports:
+        - containerPort: 8000
+          name: http
+          protocol: TCP
+        resources:
+          requests:
+            cpu: 10m
diff --git a/config/manifests/vllm/sim-deployment-llama.yaml b/config/manifests/vllm/sim-deployment-llama.yaml
@@ -0,0 +1,44 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vllm-llama3-8b-instruct
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: vllm-llama3-8b-instruct
+  template:
+    metadata:
+      labels:
+        app: vllm-llama3-8b-instruct
+    spec:
+      containers:
+      - name: vllm-sim
+        image: ghcr.io/llm-d/llm-d-inference-sim:v0.4.0
+        imagePullPolicy: Always
+        args:
+        - --model
+        - meta-llama/Llama-3.1-8B-Instruct
+        - --port
+        - "8000"
+        - --max-loras
+        - "2"
+        - --lora-modules
+        - '{"name": "food-review-1"}'
+        - '{"name": "food-review-2"}'
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        ports:
+        - containerPort: 8000
+          name: http
+          protocol: TCP
+        resources:
+          requests:
+            cpu: 10m
diff --git a/conformance/scripts/istio/Makefile b/conformance/scripts/istio/Makefile
@@ -2,7 +2,8 @@
 # Example: make all ISTIO_VERSION=1.28.0
 GATEWAY_API_VERSION ?= v1.3.0
 INFERENCE_EXTENSION_VERSION ?= v0.4.0
-ISTIO_VERSION ?= 1.27-alpha.0551127f00634403cddd4634567e65a8ecc499a7
+#ISTIO_VERSION ?= 1.27-alpha.0551127f00634403cddd4634567e65a8ecc499a7
+ISTIO_VERSION ?= 1.27-alpha.e5a2085c2d34299580f6bb2c124752ba4b1aa651
 ISTIO_HUB ?= 
 ISTIO_PROFILE ?= minimal
 

diff --git a/go.mod b/go.mod
@@ -85,13 +85,18 @@ require (
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
 	github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
+	github.com/openai/openai-go/v2 v2.4.2 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/prometheus/procfs v0.16.1 // indirect
 	github.com/spf13/cobra v1.9.1 // indirect
 	github.com/spf13/pflag v1.0.7 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
+	github.com/tidwall/gjson v1.18.0 // indirect
+	github.com/tidwall/match v1.2.0 // indirect
+	github.com/tidwall/pretty v1.2.1 // indirect
+	github.com/tidwall/sjson v1.2.5 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
 	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect

diff --git a/go.sum b/go.sum
@@ -200,6 +200,8 @@ github.com/onsi/ginkgo/v2 v2.25.1 h1:Fwp6crTREKM+oA6Cz4MsO8RhKQzs2/gOIVOUscMAfZY
 github.com/onsi/ginkgo/v2 v2.25.1/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk=
 github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A=
 github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k=
+github.com/openai/openai-go/v2 v2.4.2 h1:TF37Vjq2rX2FmPlnn38rPgfa80V4eKvsmSQz1GeB1M0=
+github.com/openai/openai-go/v2 v2.4.2/go.mod h1:sIUkR+Cu/PMUVkSKhkk742PRURkQOCFhiwJ7eRSBqmk=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ=
 github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
@@ -244,6 +246,17 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY=
+github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
+github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM=
+github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
+github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
+github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
+github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
+github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=