Skip to content

Commit da87cb0

Browse files
Updating Knative Configuration and Annotations to Support 0 Initial Scale (#537) (#579)
* adding initial-scale and max-scale annotations to knative service resources created for inference graphs * adding initial-scale annotation for knative service resources created for inference services * now checking knative autoscaler configuration prior to knative service creation * fixing golang linter errors * now relying only on the knativeserving cr to read the autoscaler configuration * addressing comments * updating comments * adding knative operator APIs to scheme and using kubebuilder to set kserve-manager-role rules * reformatting * updating GetAutoscalerConfiguration method to look for both the autoscale and config-autoscaler keys in the KnativeServing CR * refactoring GetAutoscalerConfiguration method --------- Signed-off-by: Brett Thompson <[email protected]>
1 parent 76f26d8 commit da87cb0

File tree

22 files changed

+3370
-49
lines changed

22 files changed

+3370
-49
lines changed

charts/kserve-resources/templates/clusterrole.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,14 @@ rules:
132132
- patch
133133
- update
134134
- watch
135+
- apiGroups:
136+
- operator.knative.dev
137+
resources:
138+
- knativeservings
139+
verbs:
140+
- get
141+
- list
142+
- watch
135143
- apiGroups:
136144
- rbac.authorization.k8s.io
137145
resourceNames:
@@ -149,6 +157,7 @@ rules:
149157
- routes
150158
verbs:
151159
- create
160+
- delete
152161
- get
153162
- list
154163
- patch

cmd/manager/main.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
3232
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
3333
"k8s.io/client-go/tools/record"
34+
operatorv1beta1 "knative.dev/operator/pkg/apis/operator/v1beta1"
3435
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
3536
ctrl "sigs.k8s.io/controller-runtime"
3637
"sigs.k8s.io/controller-runtime/pkg/client/config"
@@ -176,6 +177,20 @@ func main() {
176177
os.Exit(1)
177178
}
178179
}
180+
181+
knServingFound, knServingCheckErr := utils.IsCrdAvailable(cfg, operatorv1beta1.SchemeGroupVersion.String(), constants.KnativeServingKind)
182+
if knServingCheckErr != nil {
183+
setupLog.Error(knServingCheckErr, "error when checking if Knative KnativeServing kind is available")
184+
os.Exit(1)
185+
}
186+
if knServingFound {
187+
setupLog.Info("Setting up Knative Operator scheme")
188+
if err := operatorv1beta1.AddToScheme(mgr.GetScheme()); err != nil {
189+
setupLog.Error(err, "unable to add Knative Operator APIs to scheme")
190+
os.Exit(1)
191+
}
192+
}
193+
179194
if !ingressConfig.DisableIstioVirtualHost {
180195
vsFound, vsCheckErr := utils.IsCrdAvailable(cfg, istioclientv1beta1.SchemeGroupVersion.String(), constants.IstioVirtualServiceKind)
181196
if vsCheckErr != nil {

config/configmap/inferenceservice.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ data:
5959
# revisions, which prevents the reconciliation loop to be triggered if the annotations is
6060
# configured here are used.
6161
# Default values are:
62+
# "autoscaling.knative.dev/initial-scale",
6263
# "autoscaling.knative.dev/min-scale",
6364
# "autoscaling.knative.dev/max-scale",
6465
# "internal.serving.kserve.io/storage-initializer-sourceuri",

config/overlays/odh/inferenceservice-config-patch.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ data:
8888
inferenceService: |-
8989
{
9090
"serviceAnnotationDisallowedList": [
91+
"autoscaling.knative.dev/initial-scale",
9192
"autoscaling.knative.dev/min-scale",
9293
"autoscaling.knative.dev/max-scale",
9394
"internal.serving.kserve.io/storage-initializer-sourceuri",

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,14 @@ rules:
119119
- patch
120120
- update
121121
- watch
122+
- apiGroups:
123+
- operator.knative.dev
124+
resources:
125+
- knativeservings
126+
verbs:
127+
- get
128+
- list
129+
- watch
122130
- apiGroups:
123131
- rbac.authorization.k8s.io
124132
resourceNames:

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ require (
4040
k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2
4141
k8s.io/utils v0.0.0-20240821151609-f90d01438635
4242
knative.dev/networking v0.0.0-20240815142417-37fdbdd0854b
43+
knative.dev/operator v0.42.2
4344
knative.dev/pkg v0.0.0-20240815051656-89743d9bbf7c
4445
knative.dev/serving v0.42.2
4546
sigs.k8s.io/controller-runtime v0.19.1

go.sum

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
286286
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
287287
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
288288
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
289+
github.com/manifestival/manifestival v0.7.2 h1:l4uFdWX/xQK4QcRfqGoMtBvaZeWPEuwD6hVsCwUqZY4=
290+
github.com/manifestival/manifestival v0.7.2/go.mod h1:nl3T6HlfHCeidooWVTMI9vYNTBkQ1GdhLNb+smozbdk=
289291
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
290292
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
291293
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
@@ -765,8 +767,12 @@ k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2 h1:GKE9U8BH16uynoxQii0auT
765767
k8s.io/kube-openapi v0.0.0-20240827152857-f7e401e7b4c2/go.mod h1:coRQXBK9NxO98XUv3ZD6AK3xzHCxV6+b7lrquKwaKzA=
766768
k8s.io/utils v0.0.0-20240821151609-f90d01438635 h1:2wThSvJoW/Ncn9TmQEYXRnevZXi2duqHWf5OX9S3zjI=
767769
k8s.io/utils v0.0.0-20240821151609-f90d01438635/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
770+
knative.dev/caching v0.0.0-20240716132144-989f54c83776 h1:2nINnWuXtb9e2nG/EJxSCeghcmu6qmvmomJ7woiP5Is=
771+
knative.dev/caching v0.0.0-20240716132144-989f54c83776/go.mod h1:Uj74eO9rLiK1eb8wmDBED1hJBZQ7MJ9cvq/d8Ktsm3c=
768772
knative.dev/networking v0.0.0-20240815142417-37fdbdd0854b h1:ws/Jeho6on84+5tfNKLAKriVVGIwivHbgPEtZjBfcs0=
769773
knative.dev/networking v0.0.0-20240815142417-37fdbdd0854b/go.mod h1:2eMQVGLBZ5Kj1C4kKPuPhO7BsUeF6fkmhZFDQPIP+88=
774+
knative.dev/operator v0.42.2 h1:wgAWYHwoSFmV+wPHCt5dZahHTHLy2VCM4G82PEo9iSc=
775+
knative.dev/operator v0.42.2/go.mod h1:cfSpJMgvwmuZ7USaxC+zgEuizMFc/xweREW5DG6J1DA=
770776
knative.dev/pkg v0.0.0-20240815051656-89743d9bbf7c h1:2crXVk4FG0dSG6WHaIT+WKbUzn7qG2wn0AfYmvA22zs=
771777
knative.dev/pkg v0.0.0-20240815051656-89743d9bbf7c/go.mod h1:cI2RPEEHZk+/dBpfHobs0aBdPA1mMZVUVWnGAc8NSzM=
772778
knative.dev/serving v0.42.2 h1:yKieg3MeNvpVz+4JJPbvmpee3v3LK3zO5h5HJBtzaNk=

pkg/constants/constants.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ var (
9090
AutoscalerClass = KServeAPIGroupName + "/autoscalerClass"
9191
AutoscalerMetrics = KServeAPIGroupName + "/metrics"
9292
TargetUtilizationPercentage = KServeAPIGroupName + "/targetUtilizationPercentage"
93+
InitialScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/initial-scale"
9394
MinScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/min-scale"
9495
MaxScaleAnnotationKey = KnativeAutoscalingAPIGroupName + "/max-scale"
9596
RollOutDurationAnnotationKey = KnativeServingAPIGroupName + "/rollout-duration"
@@ -257,9 +258,14 @@ type InferenceServiceProtocol string
257258

258259
// Knative constants
259260
const (
260-
KnativeLocalGateway = "knative-serving/knative-local-gateway"
261-
KnativeIngressGateway = "knative-serving/knative-ingress-gateway"
262-
VisibilityLabel = "networking.knative.dev/visibility"
261+
AutoscalerKey = "autoscaler"
262+
AutoscalerInitialScaleKey = "initial-scale"
263+
AutoscalerAllowZeroScaleKey = "allow-zero-initial-scale"
264+
DefaultKnServingName = "knative-serving"
265+
DefaultKnServingNamespace = "knative-serving"
266+
KnativeLocalGateway = "knative-serving/knative-local-gateway"
267+
KnativeIngressGateway = "knative-serving/knative-ingress-gateway"
268+
VisibilityLabel = "networking.knative.dev/visibility"
263269
)
264270

265271
var (
@@ -374,6 +380,7 @@ var (
374380
// revisions, which prevents the reconciliation loop to be triggered if the annotations is
375381
// configured here are used.
376382
ServiceAnnotationDisallowedList = []string{
383+
autoscaling.InitialScaleAnnotationKey,
377384
autoscaling.MinScaleAnnotationKey,
378385
autoscaling.MaxScaleAnnotationKey,
379386
StorageInitializerSourceUriInternalAnnotationKey,
@@ -499,6 +506,7 @@ const (
499506
IstioVirtualServiceKind = "VirtualService"
500507
KnativeServiceKind = "Service"
501508
ClusterLocalModelKind = "ClusterLocalModel"
509+
KnativeServingKind = "KnativeServing"
502510
)
503511

504512
// Model Parallel Options

pkg/controller/v1alpha1/inferencegraph/controller.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ limitations under the License.
2323
// +kubebuilder:rbac:groups=serving.knative.dev,resources=services/status,verbs=get;update;patch
2424
// +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=create;get;update;patch;watch;delete
2525
// +kubebuilder:rbac:groups=route.openshift.io,resources=routes/status,verbs=get
26+
// +kubebuilder:rbac:groups=operator.knative.dev,resources=knativeservings,verbs=get;list;watch
2627
package inferencegraph
2728

2829
import (
@@ -45,6 +46,7 @@ import (
4546
"k8s.io/client-go/kubernetes"
4647
"k8s.io/client-go/rest"
4748
"k8s.io/client-go/tools/record"
49+
operatorv1beta1 "knative.dev/operator/pkg/apis/operator/v1beta1"
4850
"knative.dev/pkg/apis"
4951
knservingv1 "knative.dev/serving/pkg/apis/serving/v1"
5052
ctrl "sigs.k8s.io/controller-runtime"
@@ -267,10 +269,26 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque
267269
if !ksvcAvailable {
268270
r.Recorder.Event(graph, v1.EventTypeWarning, "ServerlessModeRejected",
269271
"It is not possible to use Serverless deployment mode when Knative Services are not available")
270-
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative Serving is not available", graph.Name))
272+
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative Services are not available", graph.Name))
273+
}
274+
275+
// Abort if Knative KnativeServings are not available
276+
knServingFound, knServingCheckErr := utils.IsCrdAvailable(r.ClientConfig, operatorv1beta1.SchemeGroupVersion.String(), constants.KnativeServingKind)
277+
if knServingCheckErr != nil {
278+
return reconcile.Result{}, knServingCheckErr
279+
}
280+
281+
if !knServingFound {
282+
r.Recorder.Event(graph, v1.EventTypeWarning, "ServerlessModeRejected",
283+
"It is not possible to use Serverless deployment mode when Knative KnativeServings are not available")
284+
return reconcile.Result{Requeue: false}, reconcile.TerminalError(fmt.Errorf("the resolved deployment mode of InferenceGraph '%s' is Serverless, but Knative KnativeServings are not available", graph.Name))
285+
}
286+
287+
desired, err := createKnativeService(r.Client, graph.ObjectMeta, graph, routerConfig)
288+
if err != nil {
289+
return ctrl.Result{}, errors.Wrapf(err, "fails to create new knative service")
271290
}
272291

273-
desired := createKnativeService(graph.ObjectMeta, graph, routerConfig)
274292
err = controllerutil.SetControllerReference(graph, desired, r.Scheme)
275293
if err != nil {
276294
return reconcile.Result{}, err

0 commit comments

Comments
 (0)