Skip to content

Commit 3bd0c78

Browse files
committed
(llmisvc): migrate to v1 InferencePool with v1alpha2 failover
- Update API to use v1 InferencePool spec - Create both v1 and v1alpha2 InferencePool objects - Implement one-way migration with HTTPRoute annotation - Add dual backendRefs to HTTPRoute (v1:100, v1alpha2:0) - Report InferencePoolReady when either pool is ready (prioritize v1) - Store migration state on child objects (HTTPRoute) - Add test coverage for migration logic Implements RHOAIENG-34472 acceptance criteria: - ✅ v1 API usage - ✅ Dual pool creation - ✅ Readiness aggregation - ✅ HTTPRoute migration logic Signed-off-by: Killian Golds <[email protected]> rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
1 parent f834155 commit 3bd0c78

20 files changed

+1540
-473
lines changed

cmd/manager/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
istioclientv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
3434
corev1 "k8s.io/api/core/v1"
3535
rbacv1 "k8s.io/api/rbac/v1"
36+
"k8s.io/client-go/dynamic"
3637
"k8s.io/client-go/kubernetes"
3738
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
3839
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
@@ -311,9 +312,15 @@ func main() {
311312
setupLog.Info("Setting up LLMInferenceService controller")
312313
llmEventBroadcaster := record.NewBroadcaster()
313314
llmEventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")})
315+
dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
316+
if err != nil {
317+
setupLog.Error(err, "unable to create dynamic client")
318+
os.Exit(1)
319+
}
314320
if err = (&llmisvc.LLMInferenceServiceReconciler{
315321
Client: mgr.GetClient(),
316322
Clientset: clientSet,
323+
DynamicClient: dynamicClient,
317324
EventRecorder: llmEventBroadcaster.NewRecorder(scheme, corev1.EventSource{Component: "LLMInferenceServiceController"}),
318325
}).SetupWithManager(mgr); err != nil {
319326
setupLog.Error(err, "unable to create controller", "v1beta1Controller", "InferenceService")

config/llmisvc/config-llm-router-route.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@ spec:
1616
{{ .GlobalConfig.IngressGatewayNamespace }}
1717
rules:
1818
- backendRefs:
19+
- group: inference.networking.k8s.io
20+
kind: InferencePool
21+
name: |-
22+
{{ ChildName .ObjectMeta.Name `-inference-pool` }}
23+
port: 8000
24+
weight: 100
1925
- group: inference.networking.x-k8s.io
2026
kind: InferencePool
2127
name: |-
2228
{{ ChildName .ObjectMeta.Name `-inference-pool` }}
2329
port: 8000
24-
weight: 1
30+
weight: 0
2531
matches:
2632
- path:
2733
type: PathPrefix

config/llmisvc/config-llm-scheduler.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ spec:
77
scheduler:
88
pool:
99
spec:
10-
extensionRef:
11-
failureMode: FailOpen
10+
endpointPickerRef:
1211
kind: Service
1312
name: |-
1413
{{ ChildName .ObjectMeta.Name `-epp-service` }}
15-
selector: { }
16-
targetPortNumber: 8000
14+
selector:
15+
matchLabels: { }
16+
targetPorts:
17+
- number: 8000
1718
template:
1819
containers:
1920
- name: main

go.mod

Lines changed: 77 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ go 1.24.1
44

55
require (
66
cloud.google.com/go/storage v1.50.0
7-
github.com/aws/aws-sdk-go v1.55.6
7+
github.com/aws/aws-sdk-go v1.55.7
88
github.com/cloudevents/sdk-go/v2 v2.15.2
99
github.com/fsnotify/fsnotify v1.9.0
1010
github.com/getkin/kin-openapi v0.131.0
@@ -18,69 +18,78 @@ require (
1818
github.com/json-iterator/go v1.1.12
1919
github.com/kedacore/keda/v2 v2.16.1
2020
github.com/kelseyhightower/envconfig v1.4.0
21-
github.com/onsi/ginkgo/v2 v2.23.3
22-
github.com/onsi/gomega v1.36.3
21+
github.com/onsi/ginkgo/v2 v2.24.0
22+
github.com/onsi/gomega v1.38.0
2323
github.com/open-telemetry/opentelemetry-operator v0.113.0
2424
github.com/openshift/api v0.0.0-20241108213852-e22f17d9b7f5
2525
github.com/pkg/errors v0.9.1
2626
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.2
27-
github.com/spf13/cobra v1.8.1
27+
github.com/spf13/cobra v1.9.1
2828
github.com/spf13/pflag v1.0.6
2929
github.com/stretchr/testify v1.10.0
3030
github.com/tidwall/gjson v1.18.0
3131
go.uber.org/zap v1.27.0
3232
gomodules.xyz/jsonpatch/v2 v2.5.0
33-
google.golang.org/api v0.226.0
34-
google.golang.org/protobuf v1.36.6
33+
google.golang.org/api v0.238.0
34+
google.golang.org/protobuf v1.36.7
3535
gopkg.in/go-playground/validator.v9 v9.31.0
3636
istio.io/api v1.24.2
3737
istio.io/client-go v1.24.2
38-
k8s.io/api v0.33.1
39-
k8s.io/apiextensions-apiserver v0.33.1
40-
k8s.io/apimachinery v0.33.1
41-
k8s.io/client-go v0.33.1
42-
k8s.io/code-generator v0.33.1
38+
k8s.io/api v0.33.4
39+
k8s.io/apiextensions-apiserver v0.33.4
40+
k8s.io/apimachinery v0.33.4
41+
k8s.io/client-go v0.33.4
42+
k8s.io/code-generator v0.33.4
4343
k8s.io/component-helpers v0.33.1
4444
k8s.io/klog/v2 v2.130.1
4545
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff
4646
k8s.io/utils v0.0.0-20241210054802-24370beab758
4747
knative.dev/networking v0.0.0-20250117155906-67d1c274ba6a
4848
knative.dev/pkg v0.0.0-20250117084104-c43477f0052b
4949
knative.dev/serving v0.44.0
50-
sigs.k8s.io/controller-runtime v0.20.4
51-
sigs.k8s.io/gateway-api v1.2.1
52-
sigs.k8s.io/gateway-api-inference-extension v0.3.0
50+
sigs.k8s.io/controller-runtime v0.21.0
51+
sigs.k8s.io/gateway-api v1.3.0
52+
sigs.k8s.io/gateway-api-inference-extension v1.0.1
5353
sigs.k8s.io/lws v0.6.2
54-
sigs.k8s.io/yaml v1.4.0
54+
sigs.k8s.io/yaml v1.6.0
5555
)
5656

5757
require (
58-
cel.dev/expr v0.19.1 // indirect
59-
cloud.google.com/go v0.116.0 // indirect
60-
cloud.google.com/go/auth v0.15.0 // indirect
61-
cloud.google.com/go/auth/oauth2adapt v0.2.7 // indirect
62-
cloud.google.com/go/compute/metadata v0.6.0 // indirect
63-
cloud.google.com/go/iam v1.2.2 // indirect
64-
cloud.google.com/go/monitoring v1.22.0 // indirect
58+
cel.dev/expr v0.24.0 // indirect
59+
cloud.google.com/go v0.120.0 // indirect
60+
cloud.google.com/go/auth v0.16.2 // indirect
61+
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
62+
cloud.google.com/go/compute/metadata v0.7.0 // indirect
63+
cloud.google.com/go/iam v1.5.2 // indirect
64+
cloud.google.com/go/monitoring v1.24.2 // indirect
6565
contrib.go.opencensus.io/exporter/ocagent v0.7.1-0.20200907061046-05415f1de66d // indirect
6666
contrib.go.opencensus.io/exporter/prometheus v0.4.2 // indirect
6767
dario.cat/mergo v1.0.1 // indirect
68-
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0 // indirect
69-
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.48.1 // indirect
70-
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.48.1 // indirect
68+
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 // indirect
69+
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 // indirect
70+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.27.0 // indirect
71+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.50.0 // indirect
72+
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.50.0 // indirect
73+
github.com/Masterminds/semver/v3 v3.3.1 // indirect
74+
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect
7175
github.com/beorn7/perks v1.0.1 // indirect
7276
github.com/blendle/zapdriver v1.3.1 // indirect
7377
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
7478
github.com/cespare/xxhash/v2 v2.3.0 // indirect
75-
github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3 // indirect
79+
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect
80+
github.com/containerd/errdefs v1.0.0 // indirect
81+
github.com/containerd/errdefs/pkg v0.3.0 // indirect
7682
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
83+
github.com/digitalocean/godo v1.152.0 // indirect
84+
github.com/docker/docker v28.2.2+incompatible // indirect
7785
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
7886
github.com/envoyproxy/go-control-plane/envoy v1.32.4 // indirect
7987
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
8088
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
8189
github.com/expr-lang/expr v1.17.0 // indirect
8290
github.com/felixge/httpsnoop v1.0.4 // indirect
8391
github.com/fxamacker/cbor/v2 v2.8.0 // indirect
92+
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
8493
github.com/go-kit/log v0.2.1 // indirect
8594
github.com/go-logfmt/logfmt v0.6.0 // indirect
8695
github.com/go-logr/stdr v1.2.2 // indirect
@@ -91,23 +100,31 @@ require (
91100
github.com/go-playground/universal-translator v0.18.1 // indirect
92101
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
93102
github.com/go-test/deep v1.1.0 // indirect
103+
github.com/go-zookeeper/zk v1.0.4 // indirect
94104
github.com/gogo/protobuf v1.3.2 // indirect
95105
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
96106
github.com/google/gnostic-models v0.6.9 // indirect
97107
github.com/google/go-containerregistry v0.13.0 // indirect
98-
github.com/google/pprof v0.0.0-20250208200701-d0013a598941 // indirect
108+
github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a // indirect
99109
github.com/google/s2a-go v0.1.9 // indirect
100110
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
101-
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
111+
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
112+
github.com/gophercloud/gophercloud/v2 v2.7.0 // indirect
102113
github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect
103-
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect
114+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
115+
github.com/hashicorp/consul/api v1.32.0 // indirect
104116
github.com/hashicorp/go-version v1.7.0 // indirect
105117
github.com/hashicorp/golang-lru v1.0.2 // indirect
118+
github.com/hashicorp/nomad/api v0.0.0-20241218080744-e3ac00f30eec // indirect
119+
github.com/hetznercloud/hcloud-go/v2 v2.21.1 // indirect
106120
github.com/inconshreveable/mousetrap v1.1.0 // indirect
121+
github.com/ionos-cloud/sdk-go/v6 v6.3.4 // indirect
107122
github.com/jmespath/go-jmespath v0.4.1-0.20220621161143-b0104c826a24 // indirect
108123
github.com/josharian/intern v1.0.0 // indirect
109124
github.com/leodido/go-urn v1.4.0 // indirect
125+
github.com/linode/linodego v1.52.1 // indirect
110126
github.com/mailru/easyjson v0.9.0 // indirect
127+
github.com/miekg/dns v1.1.66 // indirect
111128
github.com/mitchellh/mapstructure v1.5.0 // indirect
112129
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
113130
github.com/modern-go/reflect2 v1.0.2 // indirect
@@ -116,47 +133,54 @@ require (
116133
github.com/oasdiff/yaml v0.0.0-20250309154309-f31be36b4037 // indirect
117134
github.com/oasdiff/yaml3 v0.0.0-20250309153720-d2182401db90 // indirect
118135
github.com/opencontainers/go-digest v1.0.0 // indirect
136+
github.com/ovh/go-ovh v1.8.0 // indirect
119137
github.com/perimeterx/marshmallow v1.1.5 // indirect
120138
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
121139
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
122-
github.com/prometheus/client_golang v1.22.0 // indirect
140+
github.com/prometheus/client_golang v1.23.0 // indirect
123141
github.com/prometheus/client_model v0.6.2 // indirect
124-
github.com/prometheus/common v0.64.0 // indirect
142+
github.com/prometheus/common v0.65.0 // indirect
125143
github.com/prometheus/procfs v0.16.1 // indirect
126-
github.com/prometheus/prometheus v0.55.1 // indirect
144+
github.com/prometheus/sigv4 v0.2.0 // indirect
127145
github.com/prometheus/statsd_exporter v0.27.1 // indirect
146+
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.33 // indirect
147+
github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect
148+
github.com/stackitcloud/stackit-sdk-go/core v0.17.2 // indirect
128149
github.com/stretchr/objx v0.5.2 // indirect
129150
github.com/tidwall/match v1.1.1 // indirect
130151
github.com/tidwall/pretty v1.2.1 // indirect
131152
github.com/x448/float16 v0.8.4 // indirect
153+
github.com/zeebo/errs v1.4.0 // indirect
132154
go.opencensus.io v0.24.0 // indirect
133155
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
134-
go.opentelemetry.io/collector/featuregate v1.24.0 // indirect
135-
go.opentelemetry.io/contrib/detectors/gcp v1.34.0 // indirect
136-
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.59.0 // indirect
137-
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect
138-
go.opentelemetry.io/otel v1.35.0 // indirect
156+
go.opentelemetry.io/collector/featuregate v1.34.0 // indirect
157+
go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect
158+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
159+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
160+
go.opentelemetry.io/otel v1.36.0 // indirect
139161
go.opentelemetry.io/otel/exporters/prometheus v0.56.0 // indirect
140-
go.opentelemetry.io/otel/metric v1.35.0 // indirect
141-
go.opentelemetry.io/otel/sdk v1.35.0 // indirect
142-
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
143-
go.opentelemetry.io/otel/trace v1.35.0 // indirect
162+
go.opentelemetry.io/otel/metric v1.36.0 // indirect
163+
go.opentelemetry.io/otel/sdk v1.36.0 // indirect
164+
go.opentelemetry.io/otel/sdk/metric v1.36.0 // indirect
165+
go.opentelemetry.io/otel/trace v1.36.0 // indirect
166+
go.uber.org/automaxprocs v1.6.0 // indirect
144167
go.uber.org/multierr v1.11.0 // indirect
145-
golang.org/x/crypto v0.39.0 // indirect
168+
go.yaml.in/yaml/v2 v2.4.2 // indirect
169+
golang.org/x/crypto v0.41.0 // indirect
146170
golang.org/x/exp v0.0.0-20250606033433-dcc06ee1d476 // indirect
147-
golang.org/x/mod v0.25.0 // indirect
148-
golang.org/x/net v0.41.0 // indirect
171+
golang.org/x/mod v0.27.0 // indirect
172+
golang.org/x/net v0.43.0 // indirect
149173
golang.org/x/oauth2 v0.30.0 // indirect
150-
golang.org/x/sync v0.15.0 // indirect
151-
golang.org/x/sys v0.33.0 // indirect
152-
golang.org/x/term v0.32.0 // indirect
153-
golang.org/x/text v0.26.0 // indirect
174+
golang.org/x/sync v0.16.0 // indirect
175+
golang.org/x/sys v0.35.0 // indirect
176+
golang.org/x/term v0.34.0 // indirect
177+
golang.org/x/text v0.28.0 // indirect
154178
golang.org/x/time v0.12.0 // indirect
155-
golang.org/x/tools v0.34.0 // indirect
156-
google.golang.org/genproto v0.0.0-20241118233622-e639e219e697 // indirect
157-
google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a // indirect
158-
google.golang.org/genproto/googleapis/rpc v0.0.0-20250313205543-e70fdf4c4cb4 // indirect
159-
google.golang.org/grpc v1.71.0 // indirect
179+
golang.org/x/tools v0.36.0 // indirect
180+
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect
181+
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect
182+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
183+
google.golang.org/grpc v1.74.2 // indirect
160184
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
161185
gopkg.in/go-playground/assert.v1 v1.2.1 // indirect
162186
gopkg.in/inf.v0 v0.9.1 // indirect

0 commit comments

Comments
 (0)