Skip to content

Commit df4829b

Browse files
Merge pull request openshift-kni#1315 from shajmakh/informer-default
controller: sched: conditionally enable Shared informer by default
2 parents 4962902 + 6a56840 commit df4829b

File tree

4 files changed

+200
-1
lines changed

4 files changed

+200
-1
lines changed

api/v1/numaresourcesscheduler_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ const (
3838
type SchedulerInformerMode string
3939

4040
const (
41-
// SchedulerInformerDedicated makes the NodeResourceTopologyMatch plugin use the default framework informer.
41+
// SchedulerInformerShared makes the NodeResourceTopologyMatch plugin use the default framework informer.
4242
SchedulerInformerShared SchedulerInformerMode = "Shared"
4343

4444
// SchedulerInformerDedicated sets an additional separate informer just for the NodeResourceTopologyMatch plugin. Default.

cmd/main.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,10 @@ func main() {
338338
Scheme: mgr.GetScheme(),
339339
SchedulerManifests: schedMf,
340340
Namespace: namespace,
341+
PlatformInfo: controller.PlatformInfo{
342+
Platform: clusterPlatform,
343+
Version: clusterPlatformVersion,
344+
},
341345
}).SetupWithManager(mgr); err != nil {
342346
klog.ErrorS(err, "unable to create controller", "controller", "NUMAResourcesScheduler")
343347
os.Exit(1)

internal/controller/numaresourcesscheduler_controller.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
"sigs.k8s.io/controller-runtime/pkg/predicate"
4242
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4343

44+
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
4445
k8swgmanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests"
4546
k8swgrbacupdate "github.com/k8stopologyawareschedwg/deployer/pkg/objectupdate/rbac"
4647

@@ -58,12 +59,24 @@ import (
5859
"github.com/openshift-kni/numaresources-operator/pkg/status"
5960
)
6061

62+
const (
63+
// ActivePodsResourcesSupportSince defines the OCP version which started to support the fixed kubelet
64+
// in which the PodResourcesAPI lists the active pods by default
65+
activePodsResourcesSupportSince = "4.20.999"
66+
)
67+
68+
type PlatformInfo struct {
69+
Platform platform.Platform
70+
Version platform.Version
71+
}
72+
6173
// NUMAResourcesSchedulerReconciler reconciles a NUMAResourcesScheduler object
6274
type NUMAResourcesSchedulerReconciler struct {
6375
client.Client
6476
Scheme *runtime.Scheme
6577
SchedulerManifests schedmanifests.Manifests
6678
Namespace string
79+
PlatformInfo PlatformInfo
6780
}
6881

6982
//+kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=*
@@ -225,6 +238,8 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
225238
klog.V(4).Info("SchedulerSync start")
226239
defer klog.V(4).Info("SchedulerSync stop")
227240

241+
platformNormalize(&instance.Spec, r.PlatformInfo)
242+
228243
schedSpec := instance.Spec.Normalize()
229244
cacheResyncPeriod := unpackAPIResyncPeriod(schedSpec.CacheResyncPeriod)
230245
replicas, err := r.computeSchedulerReplicas(ctx, schedSpec)
@@ -293,6 +308,27 @@ func (r *NUMAResourcesSchedulerReconciler) syncNUMASchedulerResources(ctx contex
293308
return schedStatus, nil
294309
}
295310

311+
func platformNormalize(spec *nropv1.NUMAResourcesSchedulerSpec, platInfo PlatformInfo) {
312+
if platInfo.Platform != platform.OpenShift && platInfo.Platform != platform.HyperShift {
313+
return
314+
}
315+
316+
parsedVersion, _ := platform.ParseVersion(activePodsResourcesSupportSince)
317+
ok, err := platInfo.Version.AtLeast(parsedVersion)
318+
if err != nil {
319+
klog.Infof("failed to compare version %v with %v, err %v", parsedVersion, platInfo.Version, err)
320+
return
321+
}
322+
323+
if !ok {
324+
return
325+
}
326+
327+
if spec.SchedulerInformer == nil {
328+
spec.SchedulerInformer = ptr.To(nropv1.SchedulerInformerShared)
329+
klog.V(4).InfoS("SchedulerInformer default is overridden", "Platform", platInfo.Platform, "PlatformVersion", platInfo.Version.String(), "SchedulerInformer", &spec.SchedulerInformer)
330+
}
331+
}
296332
func (r *NUMAResourcesSchedulerReconciler) updateStatus(ctx context.Context, sched *nropv1.NUMAResourcesScheduler, condition string, reason string, message string) error {
297333
sched.Status.Conditions, _ = status.UpdateConditions(sched.Status.Conditions, condition, reason, message)
298334
if err := r.Client.Status().Update(ctx, sched); err != nil {

internal/controller/numaresourcesscheduler_controller_test.go

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040

4141
configv1 "github.com/openshift/api/config/v1"
4242

43+
"github.com/k8stopologyawareschedwg/deployer/pkg/deployer/platform"
4344
depmanifests "github.com/k8stopologyawareschedwg/deployer/pkg/manifests"
4445
depobjupdate "github.com/k8stopologyawareschedwg/deployer/pkg/objectupdate"
4546

@@ -632,6 +633,164 @@ var _ = ginkgo.Describe("Test NUMAResourcesScheduler Reconcile", func() {
632633
gomega.Expect(*dp.Spec.Replicas).To(gomega.Equal(int32(numOfMasters)), "number of replicas is different than number of control-planes nodes; want=%d got=%d", numOfMasters, *dp.Spec.Replicas)
633634
})
634635
})
636+
637+
ginkgo.Context("with kubelet PodResourcesAPI listing active pods by default", func() {
638+
var nrs *nropv1.NUMAResourcesScheduler
639+
var reconciler *NUMAResourcesSchedulerReconciler
640+
numOfMasters := 3
641+
642+
ginkgo.When("kubelet fix is enabled", func() {
643+
fixedVersion, _ := platform.ParseVersion(activePodsResourcesSupportSince)
644+
645+
ginkgo.DescribeTable("should configure by default the informerMode to the expected when field is not set", func(reconcilerPlatInfo PlatformInfo, expectedInformer string) {
646+
var err error
647+
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
648+
initObjects := []runtime.Object{nrs}
649+
initObjects = append(initObjects, fakeNodes(numOfMasters, 3)...)
650+
reconciler, err = NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
651+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
652+
653+
reconciler.PlatformInfo = reconcilerPlatInfo
654+
655+
key := client.ObjectKeyFromObject(nrs)
656+
_, err = reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
657+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
658+
659+
expectCacheParams(reconciler.Client, depmanifests.CacheResyncAutodetect, depmanifests.CacheResyncOnlyExclusiveResources, expectedInformer)
660+
},
661+
ginkgo.Entry("with fixed Openshift the default informer is Shared", PlatformInfo{
662+
Platform: platform.OpenShift,
663+
Version: fixedVersion,
664+
}, depmanifests.CacheInformerShared),
665+
ginkgo.Entry("with fixed Hypershift the default informer is Shared", PlatformInfo{
666+
Platform: platform.HyperShift,
667+
Version: fixedVersion,
668+
}, depmanifests.CacheInformerShared),
669+
ginkgo.Entry("with unknown platform the default informer is Dedicated (unchanged)", PlatformInfo{}, depmanifests.CacheInformerDedicated))
670+
671+
ginkgo.DescribeTable("should preserve informerMode value if set", func(reconcilerPlatInfo PlatformInfo) {
672+
var err error
673+
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
674+
infMode := nropv1.SchedulerInformerDedicated
675+
nrs.Spec.SchedulerInformer = &infMode
676+
initObjects := []runtime.Object{nrs}
677+
initObjects = append(initObjects, fakeNodes(numOfMasters, 3)...)
678+
reconciler, err = NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
679+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
680+
681+
reconciler.PlatformInfo = reconcilerPlatInfo
682+
683+
key := client.ObjectKeyFromObject(nrs)
684+
_, err = reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
685+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
686+
expectCacheParams(reconciler.Client, depmanifests.CacheResyncAutodetect, depmanifests.CacheResyncOnlyExclusiveResources, string(infMode))
687+
},
688+
ginkgo.Entry("with Openshift", PlatformInfo{
689+
Platform: platform.OpenShift,
690+
Version: fixedVersion,
691+
}),
692+
ginkgo.Entry("with Hypershift", PlatformInfo{
693+
Platform: platform.HyperShift,
694+
Version: fixedVersion,
695+
}),
696+
ginkgo.Entry("with unknown platform", PlatformInfo{}))
697+
698+
ginkgo.DescribeTable("should allow to update the informerMode to be Dedicated after an overridden default", func(reconcilerPlatInfo PlatformInfo) {
699+
var err error
700+
nrs = testobjs.NewNUMAResourcesScheduler("numaresourcesscheduler", "some/url:latest", testSchedulerName, 11*time.Second)
701+
initObjects := []runtime.Object{nrs}
702+
initObjects = append(initObjects, fakeNodes(numOfMasters, 3)...)
703+
reconciler, err = NewFakeNUMAResourcesSchedulerReconciler(initObjects...)
704+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
705+
706+
reconciler.PlatformInfo = reconcilerPlatInfo
707+
708+
key := client.ObjectKeyFromObject(nrs)
709+
_, err = reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
710+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
711+
712+
// intentionally skip checking default value
713+
714+
// should query the object after reconcile because the defaults are overridden
715+
gomega.Expect(reconciler.Client.Get(context.TODO(), key, nrs)).ToNot(gomega.HaveOccurred())
716+
717+
nrsUpdated := nrs.DeepCopy()
718+
informerMode := nropv1.SchedulerInformerDedicated
719+
nrsUpdated.Spec.SchedulerInformer = &informerMode
720+
gomega.Eventually(func() bool {
721+
if err := reconciler.Client.Update(context.TODO(), nrsUpdated); err != nil {
722+
klog.Warningf("failed to update the scheduler object; err: %v", err)
723+
return false
724+
}
725+
return true
726+
}, 30*time.Second, 5*time.Second).Should(gomega.BeTrue())
727+
728+
_, err = reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: key})
729+
gomega.Expect(err).ToNot(gomega.HaveOccurred())
730+
731+
expectCacheParams(reconciler.Client, depmanifests.CacheResyncAutodetect, depmanifests.CacheResyncOnlyExclusiveResources, string(informerMode))
732+
},
733+
ginkgo.Entry("with Openshift", PlatformInfo{
734+
Platform: platform.OpenShift,
735+
Version: fixedVersion,
736+
}),
737+
ginkgo.Entry("with Hypershift", PlatformInfo{
738+
Platform: platform.HyperShift,
739+
Version: fixedVersion,
740+
}))
741+
})
742+
})
743+
})
744+
745+
var _ = ginkgo.Describe("Test scheduler spec PreNormalize", func() {
746+
ginkgo.When("Spec.SchedulerInformer is not set by the user", func() {
747+
ginkgo.It("should override default informer to Shared if kubelet is fixed - first supported zstream version", func() {
748+
v, _ := platform.ParseVersion(activePodsResourcesSupportSince)
749+
spec := nropv1.NUMAResourcesSchedulerSpec{}
750+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
751+
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
752+
})
753+
754+
ginkgo.It("should override default informer to Shared if kubelet is fixed - version is greater than first supported (zstream)", func() {
755+
v, _ := platform.ParseVersion("4.20.1000")
756+
spec := nropv1.NUMAResourcesSchedulerSpec{}
757+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
758+
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
759+
})
760+
761+
ginkgo.It("should override default informer to Shared if kubelet is fixed - version is greater than first supported (ystream)", func() {
762+
v, _ := platform.ParseVersion("4.21.0")
763+
spec := nropv1.NUMAResourcesSchedulerSpec{}
764+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
765+
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerShared))
766+
})
767+
768+
ginkgo.It("should not override default informer if kubelet is not fixed - version is less than first supported (zstream)", func() {
769+
// this is only for testing purposes as there is plan to backport the fix to older minor versions
770+
// will need to remove this test if the fix is supported starting the first zstream of the release
771+
v, _ := platform.ParseVersion("4.20.0")
772+
spec := nropv1.NUMAResourcesSchedulerSpec{}
773+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
774+
gomega.Expect(spec.SchedulerInformer).To(gomega.BeNil())
775+
})
776+
777+
ginkgo.It("should not override default informer if kubelet is not fixed - version is less than first supported (ystream)", func() {
778+
v, _ := platform.ParseVersion("4.13.0")
779+
spec := nropv1.NUMAResourcesSchedulerSpec{}
780+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
781+
gomega.Expect(spec.SchedulerInformer).To(gomega.BeNil())
782+
})
783+
})
784+
ginkgo.When("Spec.SchedulerInformer is set by the user", func() {
785+
ginkgo.It("should preserve informer value set by the user even if kubelet is fixed", func() {
786+
v, _ := platform.ParseVersion(activePodsResourcesSupportSince)
787+
spec := nropv1.NUMAResourcesSchedulerSpec{
788+
SchedulerInformer: ptr.To(nropv1.SchedulerInformerDedicated),
789+
}
790+
platformNormalize(&spec, PlatformInfo{Platform: platform.OpenShift, Version: v})
791+
gomega.Expect(*spec.SchedulerInformer).To(gomega.Equal(nropv1.SchedulerInformerDedicated))
792+
})
793+
})
635794
})
636795

637796
func pop(m map[string]string, k string) string {

0 commit comments

Comments
 (0)