Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions apix/v1alpha2/inferencemodelrewrite_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,25 @@ type InferenceModelRewriteSpec struct {
// If multiple InferenceModelRewrite resources target the same
// InferencePool, the controller will merge them based on precedence.
//
// **Timestamp Wins:** If two rules from different rewrites all matches,
// the rule from the *oldest*
// InferenceModelRewrite resource (determined by
// metadata.creationTimestamp) will be used.
// Across all rules specified on applicable rewrites, precedence MUST be
// given to the match having an "Exact" model match over a generic match
// (a rule with an empty `matches` array).
//
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
// two rewrites both have an exact match for the same model), matching
// precedence MUST be determined by the oldest resource based on
// creation timestamp.
//
// If ties still exist within a single InferenceModelRewrite resource, the
// FIRST matching rule (in list order) is used.
// +required
Comment on lines +60 to 71
Copy link
Contributor Author

@zetxqx zetxqx Nov 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nirrozenbaum @ahg-g @kfswain

I've updated the precedence rules for conflicting matches to better align with the HTTPRoute specification in the Kubernetes Gateway API. https://github.com/kubernetes-sigs/gateway-api/blob/f24f3a61f398c65ab629da1843cb65fd5ec9419f/apis/v1/httproute_types.go#L148-L209

The new precedence order is:

  1. More specific wins: An Exact match always takes precedence over an All match (where the matches array is empty).
  2. Tie-Breaker (Oldest Rule): If the specificity of the rules is the same (a tie), the rule that was created or deployed first (the older rule) wins.

This approach is more intuitive and simplifies the implementation of efficient RewriteRule fetching per request. Specifically, when we find an exact match, we no longer need to compare it against less specific, generic rules.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SG, this matches what we had with InferenceModel also.

Rules []InferenceModelRewriteRule `json:"rules"`
}

// InferenceModelRewriteRule defines the match criteria and corresponding action.
//
// A specific model name can only be matched by one rule across all
// rules attached to the same InferencePool. If multiple rules attempt
// to match the same model name, the oldest rule (by creationTimestamp)
// will be the only one considered valid.
// For details on how precedence is determined across multiple rules and
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
// section in InferenceModelRewriteSpec.
type InferenceModelRewriteRule struct {
// Matches defines the criteria for matching a request.
// If multiple match criteria are specified, a request matches if
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,9 @@ spec:
items:
description: |-
InferenceModelRewriteRule defines the match criteria and corresponding action.
A specific model name can only be matched by one rule across all
rules attached to the same InferencePool. If multiple rules attempt
to match the same model name, the oldest rule (by creationTimestamp)
will be the only one considered valid.
For details on how precedence is determined across multiple rules and
InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
section in InferenceModelRewriteSpec.
properties:
matches:
items:
Expand Down
23 changes: 14 additions & 9 deletions docs/proposals/1816-inferenceomodelrewrite/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,25 @@ type InferenceModelRewriteSpec struct {
// If multiple InferenceModelRewrite resources target the same
// InferencePool, the controller will merge them based on precedence.
//
// **Timestamp Wins:** If two rules from different rewrite all matches,
// the rule from the *oldest*
// InferenceModelRewrite resource (determined by
// metadata.creationTimestamp) will be used.
// Across all rules specified on applicable rewrites, precedence MUST be
// given to the match having an "Exact" model match over a generic match
// (a rule with an empty `matches` array).
//
// If ties still exist across multiple InferenceModelRewrite resources (e.g.
// two rewrites both have an exact match for the same model), matching
// precedence MUST be determined by the oldest resource based on
// creation timestamp.
//
// If ties still exist within a single InferenceModelRewrite resource, the
// FIRST matching rule (in list order) is used.
// +required
Rules []InferenceModelRewriteRule `json:"rules"`
}

// InferenceModelRewriteRule defines the match criteria and corresponding action.
//
// A specific model name can only be matched by one rule across all
// rewrites attached to the same InferencePool. If multiple rules attempt
// to match the same model name, the oldest rule (by creationTimestamp)
// will be the only one considered valid.
// For details on how precedence is determined across multiple rules and
// InferenceModelRewrite resources, see the "Precedence and Conflict Resolution"
// section in InferenceModelRewriteSpec.
type InferenceModelRewriteRule struct {
// Matches defines the criteria for matching a request.
// If multiple match criteria are specified, a request matches if
Expand Down
87 changes: 87 additions & 0 deletions pkg/epp/controller/inferencemodelrewrite_reconciler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/errors"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
"sigs.k8s.io/gateway-api-inference-extension/pkg/common"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)

type InferenceModelRewriteReconciler struct {
client.Reader
Datastore datastore.Datastore
PoolGKNN common.GKNN
}

func (c *InferenceModelRewriteReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx).V(logutil.DEFAULT)
ctx = ctrl.LoggerInto(ctx, logger)

logger.Info("Reconciling InferenceModelRewrite")

infModelRewrite := &v1alpha2.InferenceModelRewrite{}
notFound := false
if err := c.Get(ctx, req.NamespacedName, infModelRewrite); err != nil {
if !errors.IsNotFound(err) {
return ctrl.Result{}, fmt.Errorf("unable to get InferenceModelRewrite - %w", err)
}
notFound = true
}

if notFound || !infModelRewrite.DeletionTimestamp.IsZero() || infModelRewrite.Spec.PoolRef == nil || infModelRewrite.Spec.PoolRef.Name != v1alpha2.ObjectName(c.PoolGKNN.Name) || infModelRewrite.Spec.PoolRef.Group != v1alpha2.Group(c.PoolGKNN.Group) {
// InferenceModelRewrite object got deleted or changed the referenced pool.
c.Datastore.RewriteDelete(req.NamespacedName)
return ctrl.Result{}, nil
}

// Add or update if the InferenceModelRewrite instance has a creation timestamp older than the existing entry of the model.
logger = logger.WithValues("poolRef", infModelRewrite.Spec.PoolRef)
c.Datastore.RewriteSet(infModelRewrite)
logger.Info("Added/Updated InferenceModelRewrite")

return ctrl.Result{}, nil
}

func (c *InferenceModelRewriteReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha2.InferenceModelRewrite{}).
WithEventFilter(predicate.Funcs{
CreateFunc: func(e event.CreateEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
UpdateFunc: func(e event.UpdateEvent) bool {
return c.eventPredicate(e.ObjectOld.(*v1alpha2.InferenceModelRewrite)) || c.eventPredicate(e.ObjectNew.(*v1alpha2.InferenceModelRewrite))
},
DeleteFunc: func(e event.DeleteEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
GenericFunc: func(e event.GenericEvent) bool { return c.eventPredicate(e.Object.(*v1alpha2.InferenceModelRewrite)) },
}).
Complete(c)
}

func (c *InferenceModelRewriteReconciler) eventPredicate(infModelRewrite *v1alpha2.InferenceModelRewrite) bool {
return infModelRewrite.Spec.PoolRef != nil && string(infModelRewrite.Spec.PoolRef.Name) == c.PoolGKNN.Name && string(infModelRewrite.Spec.PoolRef.Group) == c.PoolGKNN.Group
}
216 changes: 216 additions & 0 deletions pkg/epp/controller/inferencemodelrewrite_reconciler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"context"
"testing"
"time"

"github.com/google/go-cmp/cmp"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
"sigs.k8s.io/gateway-api-inference-extension/pkg/common"
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
)

var (
poolForRewrite = utiltest.MakeInferencePool("test-pool1").Namespace("ns1").ObjRef()
rewrite1 = &v1alpha2.InferenceModelRewrite{
ObjectMeta: metav1.ObjectMeta{
Name: "rewrite1",
Namespace: poolForRewrite.Namespace,
CreationTimestamp: metav1.Unix(1000, 0),
},
Spec: v1alpha2.InferenceModelRewriteSpec{
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
},
}
rewrite1Pool2 = &v1alpha2.InferenceModelRewrite{
ObjectMeta: metav1.ObjectMeta{
Name: rewrite1.Name,
Namespace: rewrite1.Namespace,
CreationTimestamp: metav1.Unix(1001, 0),
},
Spec: v1alpha2.InferenceModelRewriteSpec{
PoolRef: &v1alpha2.PoolObjectReference{Name: "test-pool2"},
},
}
rewrite1Updated = &v1alpha2.InferenceModelRewrite{
ObjectMeta: metav1.ObjectMeta{
Name: rewrite1.Name,
Namespace: rewrite1.Namespace,
CreationTimestamp: metav1.Unix(1003, 0),
},
Spec: v1alpha2.InferenceModelRewriteSpec{
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
Rules: []v1alpha2.InferenceModelRewriteRule{{}},
},
}
rewrite1Deleted = &v1alpha2.InferenceModelRewrite{
ObjectMeta: metav1.ObjectMeta{
Name: rewrite1.Name,
Namespace: rewrite1.Namespace,
CreationTimestamp: metav1.Unix(1004, 0),
DeletionTimestamp: &metav1.Time{Time: time.Now()},
},
Spec: v1alpha2.InferenceModelRewriteSpec{
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
},
}
rewrite2 = &v1alpha2.InferenceModelRewrite{
ObjectMeta: metav1.ObjectMeta{
Name: "rewrite2",
Namespace: poolForRewrite.Namespace,
CreationTimestamp: metav1.Unix(1001, 0),
},
Spec: v1alpha2.InferenceModelRewriteSpec{
PoolRef: &v1alpha2.PoolObjectReference{Name: v1alpha2.ObjectName(poolForRewrite.Name)},
},
}
)

func TestInferenceModelRewriteReconciler(t *testing.T) {
tests := []struct {
name string
rewritesInStore []*v1alpha2.InferenceModelRewrite
rewritesInAPIServer []*v1alpha2.InferenceModelRewrite
rewrite *v1alpha2.InferenceModelRewrite
incomingReq *types.NamespacedName
wantRewrites []*v1alpha2.InferenceModelRewrite
wantResult ctrl.Result
}{
{
name: "Empty store, add new rewrite",
rewrite: rewrite1,
wantRewrites: []*v1alpha2.InferenceModelRewrite{rewrite1},
},
{
name: "Existing rewrite changed pools",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
rewrite: rewrite1Pool2,
wantRewrites: []*v1alpha2.InferenceModelRewrite{},
},
{
name: "Not found, delete existing rewrite",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
incomingReq: &types.NamespacedName{Name: rewrite1.Name, Namespace: rewrite1.Namespace},
wantRewrites: []*v1alpha2.InferenceModelRewrite{},
},
{
name: "Deletion timestamp set, delete existing rewrite",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
rewrite: rewrite1Deleted,
incomingReq: &types.NamespacedName{Name: rewrite1Deleted.Name, Namespace: rewrite1Deleted.Namespace},
wantRewrites: []*v1alpha2.InferenceModelRewrite{},
},
{
name: "Rewrite updated",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
rewrite: rewrite1Updated,
wantRewrites: []*v1alpha2.InferenceModelRewrite{rewrite1Updated},
},
{
name: "Rewrite not found, no matching existing rewrite to delete",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
incomingReq: &types.NamespacedName{Name: "non-existent-rewrite", Namespace: poolForRewrite.Namespace},
wantRewrites: []*v1alpha2.InferenceModelRewrite{rewrite1},
},
{
name: "Add to existing",
rewritesInStore: []*v1alpha2.InferenceModelRewrite{rewrite1},
rewrite: rewrite2,
wantRewrites: []*v1alpha2.InferenceModelRewrite{rewrite1, rewrite2},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = v1alpha2.Install(scheme)
_ = v1.Install(scheme)
initObjs := []client.Object{}
if test.rewrite != nil && test.rewrite.DeletionTimestamp.IsZero() {
initObjs = append(initObjs, test.rewrite)
}
for _, r := range test.rewritesInAPIServer {
initObjs = append(initObjs, r)
}
fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(initObjs...).
Build()
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second)
ds := datastore.NewDatastore(t.Context(), pmf, 0)
for _, r := range test.rewritesInStore {
ds.RewriteSet(r)
}
_ = ds.PoolSet(context.Background(), fakeClient, poolForRewrite)
reconciler := &InferenceModelRewriteReconciler{
Reader: fakeClient,
Datastore: ds,
PoolGKNN: common.GKNN{
NamespacedName: types.NamespacedName{Name: poolForRewrite.Name, Namespace: poolForRewrite.Namespace},
GroupKind: schema.GroupKind{Group: poolForRewrite.GroupVersionKind().Group, Kind: poolForRewrite.GroupVersionKind().Kind},
},
}
if test.incomingReq == nil {
test.incomingReq = &types.NamespacedName{Name: test.rewrite.Name, Namespace: test.rewrite.Namespace}
}

result, err := reconciler.Reconcile(context.Background(), ctrl.Request{NamespacedName: *test.incomingReq})
if err != nil {
t.Fatalf("expected no error, got %v", err)
}

if diff := cmp.Diff(result, test.wantResult); diff != "" {
t.Errorf("Unexpected result diff (+got/-want): %s", diff)
}

if len(test.wantRewrites) != len(ds.RewriteGetAll()) {
t.Errorf("Unexpected number of rewrites; want: %d, got:%d", len(test.wantRewrites), len(ds.RewriteGetAll()))
}

if diff := diffStoreRewrites(ds, test.wantRewrites); diff != "" {
t.Errorf("Unexpected diff (+got/-want): %s", diff)
}
})
}
}

func diffStoreRewrites(ds datastore.Datastore, wantRewrites []*v1alpha2.InferenceModelRewrite) string {
if wantRewrites == nil {
wantRewrites = []*v1alpha2.InferenceModelRewrite{}
}

gotRewrites := ds.RewriteGetAll()
if diff := cmp.Diff(wantRewrites, gotRewrites); diff != "" {
return "rewrites:" + diff
}
return ""
}
Loading
Loading