nginx · shaun-nx · Sep 9, 2025 · Sep 16, 2025 · Sep 18, 2025 · Sep 24, 2025
diff --git a/.nvmrc b/.nvmrc
@@ -1 +1 @@
-22
+24
diff --git a/Makefile b/Makefile
@@ -33,7 +33,7 @@ GEN_CRD_API_REFERENCE_DOCS_VERSION = v0.3.0
 # renovate: datasource=go depName=sigs.k8s.io/controller-tools
 CONTROLLER_TOOLS_VERSION = v0.19.0
 # renovate: datasource=docker depName=node
-NODE_VERSION = 22
+NODE_VERSION = 24
 # renovate: datasource=docker depName=quay.io/helmpack/chart-testing
 CHART_TESTING_VERSION = v3.13.0
 # renovate: datasource=github-tags depName=dadav/helm-schema
@@ -136,6 +136,14 @@ install-gateway-crds: ## Install Gateway API CRDs
 uninstall-gateway-crds: ## Uninstall Gateway API CRDs
 	kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
 
+.PHONY: install-inference-crds
+install-inference-crds: ## Install Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
+
+.PHONY: uninstall-inference-crds
+uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
+	kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
+
 .PHONY: generate-manifests
 generate-manifests: ## Generate manifests using Helm.
 	./scripts/generate-manifests.sh

diff --git a/build/Dockerfile.nginx b/build/Dockerfile.nginx
@@ -23,7 +23,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf

diff --git a/build/Dockerfile.nginxplus b/build/Dockerfile.nginxplus
@@ -29,7 +29,7 @@ RUN apk add --no-cache bash \
     && ln -sf /dev/stderr /var/log/nginx/error.log
 
 COPY build/entrypoint.sh /agent/entrypoint.sh
-COPY ${NJS_DIR}/httpmatches.js /usr/lib/nginx/modules/njs/httpmatches.js
+COPY ${NJS_DIR}/ /usr/lib/nginx/modules/njs/
 COPY ${NGINX_CONF_DIR}/nginx-plus.conf /etc/nginx/nginx.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-locations.conf /etc/nginx/grpc-error-locations.conf
 COPY ${NGINX_CONF_DIR}/grpc-error-pages.conf /etc/nginx/grpc-error-pages.conf

diff --git a/charts/nginx-gateway-fabric/README.md b/charts/nginx-gateway-fabric/README.md
@@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
 | `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
 | `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
-| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
+| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
 | `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
 | `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
 | `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
 | `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
 | `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
 | `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
+| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
 | `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
 | `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
 | `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |

diff --git a/charts/nginx-gateway-fabric/templates/clusterrole.yaml b/charts/nginx-gateway-fabric/templates/clusterrole.yaml
@@ -129,6 +129,22 @@ rules:
   {{- end }}
   verbs:
   - update
+{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools
+  verbs:
+  - get
+  - list
+  - watch
+- apiGroups:
+  - inference.networking.k8s.io
+  resources:
+  - inferencepools/status
+  verbs:
+  - update
+{{- end }}
 {{- if .Values.nginxGateway.leaderElection.enable }}
 - apiGroups:
   - coordination.k8s.io

diff --git a/charts/nginx-gateway-fabric/templates/deployment.yaml b/charts/nginx-gateway-fabric/templates/deployment.yaml
@@ -100,6 +100,9 @@ spec:
         {{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
         - --gateway-api-experimental-features
         {{- end }}
+        {{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
+        - --gateway-api-inference-extension
+        {{- end }}
         {{- if .Values.nginxGateway.snippetsFilters.enable }}
         - --snippets-filters
         {{- end }}

diff --git a/charts/nginx-gateway-fabric/values.schema.json b/charts/nginx-gateway-fabric/values.schema.json
@@ -838,6 +838,20 @@
           "title": "gwAPIExperimentalFeatures",
           "type": "object"
         },
+        "gwAPIInferenceExtension": {
+          "properties": {
+            "enable": {
+              "default": false,
+              "description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.",
+              "required": [],
+              "title": "enable",
+              "type": "boolean"
+            }
+          },
+          "required": [],
+          "title": "gwAPIInferenceExtension",
+          "type": "object"
+        },
         "image": {
           "description": "The image configuration for the NGINX Gateway Fabric control plane.",
           "properties": {

diff --git a/charts/nginx-gateway-fabric/values.yaml b/charts/nginx-gateway-fabric/values.yaml
@@ -210,6 +210,10 @@ nginxGateway:
     # APIs installed from the experimental channel.
     enable: false
 
+  gwAPIInferenceExtension:
+    # -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.
+    enable: false
+
   snippetsFilters:
     # -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX
     # config for HTTPRoute and GRPCRoute resources.

diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
@@ -85,6 +85,7 @@ func createControllerCommand() *cobra.Command {
 		leaderElectionLockNameFlag          = "leader-election-lock-name"
 		productTelemetryDisableFlag         = "product-telemetry-disable"
 		gwAPIExperimentalFlag               = "gateway-api-experimental-features"
+		gwAPIInferenceExtensionFlag         = "gateway-api-inference-extension"
 		nginxDockerSecretFlag               = "nginx-docker-secret" //nolint:gosec // not credentials
 		usageReportSecretFlag               = "usage-report-secret"
 		usageReportEndpointFlag             = "usage-report-endpoint"
@@ -151,6 +152,7 @@ func createControllerCommand() *cobra.Command {
 		}
 
 		gwExperimentalFeatures bool
+		gwInferenceExtension   bool
 
 		disableProductTelemetry bool
 
@@ -270,6 +272,7 @@ func createControllerCommand() *cobra.Command {
 				},
 				Plus:                 plus,
 				ExperimentalFeatures: gwExperimentalFeatures,
+				InferenceExtension:   gwInferenceExtension,
 				ImageSource:          imageSource,
 				Flags: config.Flags{
 					Names:  flagKeys,
@@ -430,6 +433,14 @@ func createControllerCommand() *cobra.Command {
 			"Requires the Gateway APIs installed from the experimental channel.",
 	)
 
+	cmd.Flags().BoolVar(
+		&gwInferenceExtension,
+		gwAPIInferenceExtensionFlag,
+		false,
+		"Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+
+			"traffic to AI workloads.",
+	)
+
 	cmd.Flags().Var(
 		&nginxDockerSecrets,
 		nginxDockerSecretFlag,
@@ -746,6 +757,20 @@ func createSleepCommand() *cobra.Command {
 	return cmd
 }
 
+func createEndpointPickerCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "endpoint-picker",
+		Short: "Shim server for communication between NGINX and the Gateway API Inference Extension Endpoint Picker",
+		RunE: func(_ *cobra.Command, _ []string) error {
+			logger := ctlrZap.New().WithName("endpoint-picker-shim")
+			handler := createEndpointPickerHandler(realExtProcClientFactory(), logger)
+			return endpointPickerServer(handler)
+		},
+	}
+
+	return cmd
+}
+
 func parseFlags(flags *pflag.FlagSet) ([]string, []string) {
 	var flagKeys, flagValues []string
 

diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
@@ -0,0 +1,186 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"time"
+
+	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
+	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
+	"github.com/go-logr/logr"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
+
+	"github.com/nginx/nginx-gateway-fabric/v2/internal/framework/types"
+)
+
+// extProcClientFactory creates a new ExternalProcessorClient and returns a close function.
+type extProcClientFactory func(target string) (extprocv3.ExternalProcessorClient, func() error, error)
+
+// endpointPickerServer starts an HTTP server on the given port with the provided handler.
+func endpointPickerServer(handler http.Handler) error {
+	server := &http.Server{
+		Addr:              fmt.Sprintf("127.0.0.1:%d", types.GoShimPort),
+		Handler:           handler,
+		ReadHeaderTimeout: 10 * time.Second,
+	}
+	return server.ListenAndServe()
+}
+
+// realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
+func realExtProcClientFactory() extProcClientFactory {
+	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
+		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		if err != nil {
+			return nil, nil, err
+		}
+		client := extprocv3.NewExternalProcessorClient(conn)
+		return client, conn.Close, nil
+	}
+}
+
+// createEndpointPickerHandler returns an http.Handler that forwards requests to the EndpointPicker.
+func createEndpointPickerHandler(factory extProcClientFactory, logger logr.Logger) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		host := r.Header.Get(types.EPPEndpointHostHeader)
+		port := r.Header.Get(types.EPPEndpointPortHeader)
+		if host == "" || port == "" {
+			msg := fmt.Sprintf(
+				"missing at least one of required headers: %s and %s",
+				types.EPPEndpointHostHeader,
+				types.EPPEndpointPortHeader,
+			)
+			logger.Error(errors.New(msg), "error contacting EndpointPicker")
+			http.Error(w, msg, http.StatusBadRequest)
+			return
+		}
+
+		target := net.JoinHostPort(host, port)
+		logger.Info("Getting inference workload endpoint from EndpointPicker", "endpointPicker", target)
+
+		client, closeConn, err := factory(target)
+		if err != nil {
+			logger.Error(err, "error creating gRPC client")
+			http.Error(w, fmt.Sprintf("error creating gRPC client: %v", err), http.StatusInternalServerError)
+			return
+		}
+		defer func() {
+			if err := closeConn(); err != nil {
+				logger.Error(err, "error closing gRPC connection")
+			}
+		}()
+
+		stream, err := client.Process(r.Context())
+		if err != nil {
+			logger.Error(err, "error opening ext_proc stream")
+			http.Error(w, fmt.Sprintf("error opening ext_proc stream: %v", err), http.StatusBadGateway)
+			return
+		}
+
+		if code, err := sendRequest(stream, r); err != nil {
+			logger.Error(err, "error sending request")
+			http.Error(w, err.Error(), code)
+			return
+		}
+
+		// Receive response and extract header
+		for {
+			resp, err := stream.Recv()
+			if errors.Is(err, io.EOF) {
+				break // End of stream
+			} else if err != nil {
+				logger.Error(err, "error receiving from ext_proc")
+				http.Error(w, fmt.Sprintf("error receiving from ext_proc: %v", err), http.StatusBadGateway)
+				return
+			}
+
+			if ir := resp.GetImmediateResponse(); ir != nil {
+				code := int(ir.GetStatus().GetCode())
+				body := ir.GetBody()
+				logger.Error(fmt.Errorf("code: %d, body: %s", code, body), "received immediate response")
+				http.Error(w, string(body), code)
+				return
+			}
+
+			headers := resp.GetRequestHeaders().GetResponse().GetHeaderMutation().GetSetHeaders()
+			for _, h := range headers {
+				if h.GetHeader().GetKey() == eppMetadata.DestinationEndpointKey {
+					endpoint := string(h.GetHeader().GetRawValue())
+					w.Header().Set(h.GetHeader().GetKey(), endpoint)
+					logger.Info("Found endpoint", "endpoint", endpoint)
+				}
+			}
+		}
+		w.WriteHeader(http.StatusOK)
+	})
+}
+
+func sendRequest(stream extprocv3.ExternalProcessor_ProcessClient, r *http.Request) (int, error) {
+	if err := stream.Send(buildHeaderRequest(r)); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending headers: %w", err)
+	}
+
+	bodyReq, err := buildBodyRequest(r)
+	if err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error building body request: %w", err)
+	}
+
+	if err := stream.Send(bodyReq); err != nil {
+		return http.StatusBadGateway, fmt.Errorf("error sending body: %w", err)
+	}
+
+	if err := stream.CloseSend(); err != nil {
+		return http.StatusInternalServerError, fmt.Errorf("error closing stream: %w", err)
+	}
+
+	return 0, nil
+}
+
+func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
+	headerList := make([]*corev3.HeaderValue, 0, len(r.Header))
+	headerMap := &corev3.HeaderMap{
+		Headers: headerList,
+	}
+
+	for key, values := range r.Header {
+		for _, value := range values {
+			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
+				Key:   key,
+				Value: value,
+			})
+		}
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestHeaders{
+			RequestHeaders: &extprocv3.HttpHeaders{
+				Headers:     headerMap,
+				EndOfStream: false,
+			},
+		},
+	}
+}
+
+func buildBodyRequest(r *http.Request) (*extprocv3.ProcessingRequest, error) {
+	if r.ContentLength == 0 {
+		return nil, errors.New("request body is empty")
+	}
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading request body: %w", err)
+	}
+
+	return &extprocv3.ProcessingRequest{
+		Request: &extprocv3.ProcessingRequest_RequestBody{
+			RequestBody: &extprocv3.HttpBody{
+				Body:        body,
+				EndOfStream: true,
+			},
+		},
+	}, nil
+}