From 1a50098afa6c5bce337baf415aad6bd48edaf2d4 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 9 Jan 2026 10:38:23 +0100
Subject: [PATCH] Merge step with pipeline crd
---
PROJECT | 8 -
Tiltfile | 1 +
api/v1alpha1/decision_types.go | 2 +-
api/v1alpha1/pipeline_types.go | 55 +-
api/v1alpha1/step_types.go | 126 --
api/v1alpha1/zz_generated.deepcopy.go | 104 +-
config/crd/bases/cortex.cloud_decisions.yaml | 46 +-
config/crd/bases/cortex.cloud_pipelines.yaml | 126 +-
config/crd/bases/cortex.cloud_steps.yaml | 239 ---
config/crd/cortex.cloud_decisions.yaml | 46 +-
config/crd/cortex.cloud_pipelines.yaml | 126 +-
config/crd/cortex.cloud_steps.yaml | 239 ---
.../templates/crd/cortex.cloud_decisions.yaml | 46 +-
.../templates/crd/cortex.cloud_pipelines.yaml | 126 +-
.../templates/crd/cortex.cloud_steps.yaml | 246 ---
.../cortex-cinder/templates/steps.yaml | 1 -
.../cortex-ironcore/templates/pipelines.yaml | 8 +-
.../cortex-ironcore/templates/steps.yaml | 14 -
.../cortex-manila/templates/pipelines.yaml | 18 +-
.../cortex-manila/templates/steps.yaml | 24 -
.../cortex-nova/templates/pipelines.yaml | 168 +-
helm/bundles/cortex-nova/templates/steps.yaml | 263 ----
.../cortex-pods/templates/pipelines.yaml | 7 +-
helm/bundles/cortex-pods/templates/steps.yaml | 13 -
.../kpis/plugins/deployment/step_state.go | 81 -
.../plugins/deployment/step_state_test.go | 230 ---
internal/knowledge/kpis/supported_kpis.go | 1 -
.../decisions/cinder/pipeline_controller.go | 29 +-
.../cinder/pipeline_controller_test.go | 49 +-
.../decisions/explanation/explainer.go | 6 +-
.../decisions/explanation/explainer_test.go | 10 +-
.../scheduling/decisions/machines/noop.go | 2 +-
.../decisions/machines/pipeline_controller.go | 29 +-
.../machines/pipeline_controller_test.go | 37 +-
.../decisions/manila/pipeline_controller.go | 29 +-
.../manila/pipeline_controller_test.go | 60 +-
.../decisions/nova/pipeline_controller.go | 29 +-
.../nova/pipeline_controller_test.go | 101 +-
internal/scheduling/decisions/pods/noop.go | 2 +-
.../decisions/pods/pipeline_controller.go | 29 +-
.../pods/pipeline_controller_test.go | 35 +-
.../scheduling/descheduling/nova/monitor.go | 6 +-
.../descheduling/nova/monitor_test.go | 27 +-
.../scheduling/descheduling/nova/pipeline.go | 17 +-
.../descheduling/nova/pipeline_controller.go | 29 +-
.../nova/pipeline_controller_test.go | 30 +-
.../descheduling/nova/pipeline_test.go | 45 +-
.../descheduling/nova/plugins/base.go | 4 +-
.../descheduling/nova/plugins/base_test.go | 12 +-
internal/scheduling/descheduling/nova/step.go | 2 +-
internal/scheduling/lib/pipeline.go | 24 +-
.../scheduling/lib/pipeline_controller.go | 219 +--
.../lib/pipeline_controller_test.go | 1373 +++++++++++------
internal/scheduling/lib/pipeline_test.go | 2 +-
internal/scheduling/lib/step.go | 6 +-
internal/scheduling/lib/step_monitor.go | 6 +-
internal/scheduling/lib/step_test.go | 4 +-
internal/scheduling/lib/step_validation.go | 4 +-
.../dashboards/cortex-status.json | 210 +--
59 files changed, 1819 insertions(+), 3012 deletions(-)
delete mode 100644 api/v1alpha1/step_types.go
delete mode 100644 config/crd/bases/cortex.cloud_steps.yaml
delete mode 100644 config/crd/cortex.cloud_steps.yaml
delete mode 100644 dist/chart/templates/crd/cortex.cloud_steps.yaml
delete mode 100644 helm/bundles/cortex-cinder/templates/steps.yaml
delete mode 100644 helm/bundles/cortex-ironcore/templates/steps.yaml
delete mode 100644 helm/bundles/cortex-manila/templates/steps.yaml
delete mode 100644 helm/bundles/cortex-nova/templates/steps.yaml
delete mode 100644 helm/bundles/cortex-pods/templates/steps.yaml
delete mode 100644 internal/knowledge/kpis/plugins/deployment/step_state.go
delete mode 100644 internal/knowledge/kpis/plugins/deployment/step_state_test.go
diff --git a/PROJECT b/PROJECT
index 11519f975..5afc755e1 100644
--- a/PROJECT
+++ b/PROJECT
@@ -59,14 +59,6 @@ resources:
kind: Descheduling
path: github.com/cobaltcore-dev/cortex/api/v1alpha1
version: v1alpha1
-- api:
- crdVersion: v1
- controller: true
- domain: cortex
- group: cortex.cloud
- kind: Step
- path: github.com/cobaltcore-dev/cortex/api/v1alpha1
- version: v1alpha1
- api:
crdVersion: v1
controller: true
diff --git a/Tiltfile b/Tiltfile
index 654d079f2..3777f5744 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -31,6 +31,7 @@ helm_repo(
)
########### Dependency CRDs
+# Make sure the local cluster is running if you are running into startup issues here.
url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/hypervisor-crd.yaml'
local('curl ' + url + ' | kubectl apply -f -')
diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go
index 6a65f5498..7d6176a80 100644
--- a/api/v1alpha1/decision_types.go
+++ b/api/v1alpha1/decision_types.go
@@ -43,7 +43,7 @@ type DecisionSpec struct {
type StepResult struct {
// object reference to the scheduler step.
- StepRef corev1.ObjectReference `json:"stepRef"`
+ StepName string `json:"stepName"`
// Activations of the step for each host.
Activations map[string]float64 `json:"activations"`
}
diff --git a/api/v1alpha1/pipeline_types.go b/api/v1alpha1/pipeline_types.go
index 3f1b90213..476c26772 100644
--- a/api/v1alpha1/pipeline_types.go
+++ b/api/v1alpha1/pipeline_types.go
@@ -6,11 +6,58 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ runtime "k8s.io/apimachinery/pkg/runtime"
)
-type StepInPipeline struct {
- // Reference to the step.
- Ref corev1.ObjectReference `json:"ref"`
+type DisabledValidationsSpec struct {
+ // Whether to validate that no subjects are removed or added from the scheduler
+ // step. This should only be disabled for scheduler steps that remove subjects.
+ // Thus, if no value is provided, the default is false.
+ SameSubjectNumberInOut bool `json:"sameSubjectNumberInOut,omitempty"`
+ // Whether to validate that, after running the step, there are remaining subjects.
+ // This should only be disabled for scheduler steps that are expected to
+ // remove all subjects.
+ SomeSubjectsRemain bool `json:"someSubjectsRemain,omitempty"`
+}
+
+type StepType string
+
+const (
+ // Step for assigning weights to hosts.
+ StepTypeWeigher StepType = "weigher"
+ // Step for filtering hosts.
+ StepTypeFilter StepType = "filter"
+ // Step for generating descheduling recommendations.
+ StepTypeDescheduler StepType = "descheduler"
+)
+
+type WeigherSpec struct {
+ // The validations to disable for this step. If none are provided, all
+ // applied validations are enabled.
+ // +kubebuilder:validation:Optional
+ DisabledValidations DisabledValidationsSpec `json:"disabledValidations,omitempty"`
+}
+
+type StepSpec struct {
+ // The type of the scheduler step.
+ Type StepType `json:"type"`
+ // If the type is "weigher", this contains additional configuration for it.
+ // +kubebuilder:validation:Optional
+ Weigher *WeigherSpec `json:"weigher,omitempty"`
+
+ // The name of the scheduler step in the cortex implementation.
+ Impl string `json:"impl"`
+ // Additional configuration for the extractor that can be used
+ // +kubebuilder:validation:Optional
+ Opts runtime.RawExtension `json:"opts,omitempty"`
+ // Knowledges this step depends on to be ready.
+ // +kubebuilder:validation:Optional
+ Knowledges []corev1.ObjectReference `json:"knowledges,omitempty"`
+ // Additional description of the step which helps understand its purpose
+ // and decisions made by it.
+ // +kubebuilder:validation:Optional
+ Description string `json:"description,omitempty"`
+
// Whether this step is mandatory for the pipeline to be runnable.
// +kubebuilder:default=true
Mandatory bool `json:"mandatory"`
@@ -41,7 +88,7 @@ type PipelineSpec struct {
// The type of the pipeline.
Type PipelineType `json:"type"`
// The ordered list of steps that make up this pipeline.
- Steps []StepInPipeline `json:"steps,omitempty"`
+ Steps []StepSpec `json:"steps,omitempty"`
}
const (
diff --git a/api/v1alpha1/step_types.go b/api/v1alpha1/step_types.go
deleted file mode 100644
index 40e9d96c6..000000000
--- a/api/v1alpha1/step_types.go
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright SAP SE
-// SPDX-License-Identifier: Apache-2.0
-
-package v1alpha1
-
-import (
- corev1 "k8s.io/api/core/v1"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- runtime "k8s.io/apimachinery/pkg/runtime"
-)
-
-type DisabledValidationsSpec struct {
- // Whether to validate that no subjects are removed or added from the scheduler
- // step. This should only be disabled for scheduler steps that remove subjects.
- // Thus, if no value is provided, the default is false.
- SameSubjectNumberInOut bool `json:"sameSubjectNumberInOut,omitempty"`
- // Whether to validate that, after running the step, there are remaining subjects.
- // This should only be disabled for scheduler steps that are expected to
- // remove all subjects.
- SomeSubjectsRemain bool `json:"someSubjectsRemain,omitempty"`
-}
-
-type StepType string
-
-const (
- // Step for assigning weights to hosts.
- StepTypeWeigher StepType = "weigher"
- // Step for filtering hosts.
- StepTypeFilter StepType = "filter"
- // Step for generating descheduling recommendations.
- StepTypeDescheduler StepType = "descheduler"
-)
-
-type WeigherSpec struct {
- // The validations to disable for this step. If none are provided, all
- // applied validations are enabled.
- // +kubebuilder:validation:Optional
- DisabledValidations DisabledValidationsSpec `json:"disabledValidations,omitempty"`
-}
-
-type StepSpec struct {
- // SchedulingDomain defines in which scheduling domain this step
- // is used (e.g., nova, cinder, manila).
- SchedulingDomain SchedulingDomain `json:"schedulingDomain"`
-
- // The type of the scheduler step.
- Type StepType `json:"type"`
- // If the type is "weigher", this contains additional configuration for it.
- // +kubebuilder:validation:Optional
- Weigher *WeigherSpec `json:"weigher,omitempty"`
-
- // The name of the scheduler step in the cortex implementation.
- Impl string `json:"impl"`
- // Additional configuration for the extractor that can be used
- // +kubebuilder:validation:Optional
- Opts runtime.RawExtension `json:"opts,omitempty"`
- // Knowledges this step depends on to be ready.
- // +kubebuilder:validation:Optional
- Knowledges []corev1.ObjectReference `json:"knowledges,omitempty"`
- // Additional description of the step which helps understand its purpose
- // and decisions made by it.
- // +kubebuilder:validation:Optional
- Description string `json:"description,omitempty"`
-}
-
-const (
- // Something went wrong during the step reconciliation.
- StepConditionError = "Error"
-)
-
-type StepStatus struct {
- // If the step is ready to be executed.
- Ready bool `json:"ready"`
- // How many knowledges have been extracted.
- ReadyKnowledges int `json:"readyKnowledges"`
- // Total number of knowledges configured.
- TotalKnowledges int `json:"totalKnowledges"`
- // "ReadyKnowledges / TotalKnowledges ready" as a human-readable string
- // or "ready" if there are no knowledges configured.
- KnowledgesReadyFrac string `json:"knowledgesReadyFrac,omitempty"`
- // The current status conditions of the step.
- // +kubebuilder:validation:Optional
- Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
-}
-
-// +kubebuilder:object:root=true
-// +kubebuilder:subresource:status
-// +kubebuilder:resource:scope=Cluster
-// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp"
-// +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain"
-// +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type"
-// +kubebuilder:printcolumn:name="Ready",type="boolean",JSONPath=".status.ready"
-// +kubebuilder:printcolumn:name="Knowledges",type="string",JSONPath=".status.knowledgesReadyFrac"
-
-// Step is the Schema for the deschedulings API
-type Step struct {
- metav1.TypeMeta `json:",inline"`
-
- // metadata is a standard object metadata
- // +optional
- metav1.ObjectMeta `json:"metadata,omitempty,omitzero"`
-
- // spec defines the desired state of Step
- // +required
- Spec StepSpec `json:"spec"`
-
- // status defines the observed state of Step
- // +optional
- Status StepStatus `json:"status,omitempty,omitzero"`
-}
-
-// +kubebuilder:object:root=true
-
-// StepList contains a list of Step
-type StepList struct {
- metav1.TypeMeta `json:",inline"`
- metav1.ListMeta `json:"metadata,omitempty"`
- Items []Step `json:"items"`
-}
-
-func (*Step) URI() string { return "steps.cortex.cloud/v1alpha1" }
-func (*StepList) URI() string { return "steps.cortex.cloud/v1alpha1" }
-
-func init() {
- SchemeBuilder.Register(&Step{}, &StepList{})
-}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index fa1602f4d..5097d1e94 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -860,8 +860,10 @@ func (in *PipelineSpec) DeepCopyInto(out *PipelineSpec) {
*out = *in
if in.Steps != nil {
in, out := &in.Steps, &out.Steps
- *out = make([]StepInPipeline, len(*in))
- copy(*out, *in)
+ *out = make([]StepSpec, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
}
}
@@ -1077,85 +1079,9 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus {
return out
}
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *Step) DeepCopyInto(out *Step) {
- *out = *in
- out.TypeMeta = in.TypeMeta
- in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
- in.Spec.DeepCopyInto(&out.Spec)
- in.Status.DeepCopyInto(&out.Status)
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Step.
-func (in *Step) DeepCopy() *Step {
- if in == nil {
- return nil
- }
- out := new(Step)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *Step) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
- }
- return nil
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *StepInPipeline) DeepCopyInto(out *StepInPipeline) {
- *out = *in
- out.Ref = in.Ref
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepInPipeline.
-func (in *StepInPipeline) DeepCopy() *StepInPipeline {
- if in == nil {
- return nil
- }
- out := new(StepInPipeline)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *StepList) DeepCopyInto(out *StepList) {
- *out = *in
- out.TypeMeta = in.TypeMeta
- in.ListMeta.DeepCopyInto(&out.ListMeta)
- if in.Items != nil {
- in, out := &in.Items, &out.Items
- *out = make([]Step, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepList.
-func (in *StepList) DeepCopy() *StepList {
- if in == nil {
- return nil
- }
- out := new(StepList)
- in.DeepCopyInto(out)
- return out
-}
-
-// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
-func (in *StepList) DeepCopyObject() runtime.Object {
- if c := in.DeepCopy(); c != nil {
- return c
- }
- return nil
-}
-
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *StepResult) DeepCopyInto(out *StepResult) {
*out = *in
- out.StepRef = in.StepRef
if in.Activations != nil {
in, out := &in.Activations, &out.Activations
*out = make(map[string]float64, len(*in))
@@ -1201,28 +1127,6 @@ func (in *StepSpec) DeepCopy() *StepSpec {
return out
}
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *StepStatus) DeepCopyInto(out *StepStatus) {
- *out = *in
- if in.Conditions != nil {
- in, out := &in.Conditions, &out.Conditions
- *out = make([]metav1.Condition, len(*in))
- for i := range *in {
- (*in)[i].DeepCopyInto(&(*out)[i])
- }
- }
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepStatus.
-func (in *StepStatus) DeepCopy() *StepStatus {
- if in == nil {
- return nil
- }
- out := new(StepStatus)
- in.DeepCopyInto(out)
- return out
-}
-
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *WeigherSpec) DeepCopyInto(out *WeigherSpec) {
*out = *in
diff --git a/config/crd/bases/cortex.cloud_decisions.yaml b/config/crd/bases/cortex.cloud_decisions.yaml
index 6a8f7b209..c4dc8acb9 100644
--- a/config/crd/bases/cortex.cloud_decisions.yaml
+++ b/config/crd/bases/cortex.cloud_decisions.yaml
@@ -379,52 +379,12 @@ spec:
type: number
description: Activations of the step for each host.
type: object
- stepRef:
+ stepName:
description: object reference to the scheduler step.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
+ type: string
required:
- activations
- - stepRef
+ - stepName
type: object
type: array
targetHost:
diff --git a/config/crd/bases/cortex.cloud_pipelines.yaml b/config/crd/bases/cortex.cloud_pipelines.yaml
index a1bad9528..43c7dea12 100644
--- a/config/crd/bases/cortex.cloud_pipelines.yaml
+++ b/config/crd/bases/cortex.cloud_pipelines.yaml
@@ -73,57 +73,101 @@ spec:
description: The ordered list of steps that make up this pipeline.
items:
properties:
+ description:
+ description: |-
+ Additional description of the step which helps understand its purpose
+ and decisions made by it.
+ type: string
+ impl:
+ description: The name of the scheduler step in the cortex implementation.
+ type: string
+ knowledges:
+ description: Knowledges this step depends on to be ready.
+ items:
+ description: ObjectReference contains enough information to
+ let you inspect or modify the referred object.
+ properties:
+ apiVersion:
+ description: API version of the referent.
+ type: string
+ fieldPath:
+ description: |-
+ If referring to a piece of an object instead of an entire object, this string
+ should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+ For example, if the object reference is to a container within a pod, this would take on a value like:
+ "spec.containers{name}" (where "name" refers to the name of the container that triggered
+ the event) or if no container name is specified "spec.containers[2]" (container with
+ index 2 in this pod). This syntax is chosen only to have some well-defined way of
+ referencing a part of an object.
+ type: string
+ kind:
+ description: |-
+ Kind of the referent.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ name:
+ description: |-
+ Name of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ namespace:
+ description: |-
+ Namespace of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+ type: string
+ resourceVersion:
+ description: |-
+ Specific resourceVersion to which this reference is made, if any.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+ type: string
+ uid:
+ description: |-
+ UID of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
mandatory:
default: true
description: Whether this step is mandatory for the pipeline
to be runnable.
type: boolean
- ref:
- description: Reference to the step.
+ opts:
+ description: Additional configuration for the extractor that
+ can be used
+ type: object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: The type of the scheduler step.
+ type: string
+ weigher:
+ description: If the type is "weigher", this contains additional
+ configuration for it.
properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
+ disabledValidations:
description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
+ The validations to disable for this step. If none are provided, all
+ applied validations are enabled.
+ properties:
+ sameSubjectNumberInOut:
+ description: |-
+ Whether to validate that no subjects are removed or added from the scheduler
+ step. This should only be disabled for scheduler steps that remove subjects.
+ Thus, if no value is provided, the default is false.
+ type: boolean
+ someSubjectsRemain:
+ description: |-
+ Whether to validate that, after running the step, there are remaining subjects.
+ This should only be disabled for scheduler steps that are expected to
+ remove all subjects.
+ type: boolean
+ type: object
type: object
- x-kubernetes-map-type: atomic
required:
+ - impl
- mandatory
- - ref
+ - type
type: object
type: array
type:
diff --git a/config/crd/bases/cortex.cloud_steps.yaml b/config/crd/bases/cortex.cloud_steps.yaml
deleted file mode 100644
index a02697165..000000000
--- a/config/crd/bases/cortex.cloud_steps.yaml
+++ /dev/null
@@ -1,239 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- annotations:
- controller-gen.kubebuilder.io/version: v0.17.2
- name: steps.cortex.cloud
-spec:
- group: cortex.cloud
- names:
- kind: Step
- listKind: StepList
- plural: steps
- singular: step
- scope: Cluster
- versions:
- - additionalPrinterColumns:
- - jsonPath: .metadata.creationTimestamp
- name: Created
- type: date
- - jsonPath: .spec.schedulingDomain
- name: Domain
- type: string
- - jsonPath: .spec.type
- name: Type
- type: string
- - jsonPath: .status.ready
- name: Ready
- type: boolean
- - jsonPath: .status.knowledgesReadyFrac
- name: Knowledges
- type: string
- name: v1alpha1
- schema:
- openAPIV3Schema:
- description: Step is the Schema for the deschedulings API
- properties:
- apiVersion:
- description: |-
- APIVersion defines the versioned schema of this representation of an object.
- Servers should convert recognized schemas to the latest internal value, and
- may reject unrecognized values.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
- type: string
- kind:
- description: |-
- Kind is a string value representing the REST resource this object represents.
- Servers may infer this from the endpoint the client submits requests to.
- Cannot be updated.
- In CamelCase.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- metadata:
- type: object
- spec:
- description: spec defines the desired state of Step
- properties:
- description:
- description: |-
- Additional description of the step which helps understand its purpose
- and decisions made by it.
- type: string
- impl:
- description: The name of the scheduler step in the cortex implementation.
- type: string
- knowledges:
- description: Knowledges this step depends on to be ready.
- items:
- description: ObjectReference contains enough information to let
- you inspect or modify the referred object.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
- type: array
- opts:
- description: Additional configuration for the extractor that can be
- used
- type: object
- x-kubernetes-preserve-unknown-fields: true
- schedulingDomain:
- description: |-
- SchedulingDomain defines in which scheduling domain this step
- is used (e.g., nova, cinder, manila).
- type: string
- type:
- description: The type of the scheduler step.
- type: string
- weigher:
- description: If the type is "weigher", this contains additional configuration
- for it.
- properties:
- disabledValidations:
- description: |-
- The validations to disable for this step. If none are provided, all
- applied validations are enabled.
- properties:
- sameSubjectNumberInOut:
- description: |-
- Whether to validate that no subjects are removed or added from the scheduler
- step. This should only be disabled for scheduler steps that remove subjects.
- Thus, if no value is provided, the default is false.
- type: boolean
- someSubjectsRemain:
- description: |-
- Whether to validate that, after running the step, there are remaining subjects.
- This should only be disabled for scheduler steps that are expected to
- remove all subjects.
- type: boolean
- type: object
- type: object
- required:
- - impl
- - schedulingDomain
- - type
- type: object
- status:
- description: status defines the observed state of Step
- properties:
- conditions:
- description: The current status conditions of the step.
- items:
- description: Condition contains details for one aspect of the current
- state of this API Resource.
- properties:
- lastTransitionTime:
- description: |-
- lastTransitionTime is the last time the condition transitioned from one status to another.
- This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
- format: date-time
- type: string
- message:
- description: |-
- message is a human readable message indicating details about the transition.
- This may be an empty string.
- maxLength: 32768
- type: string
- observedGeneration:
- description: |-
- observedGeneration represents the .metadata.generation that the condition was set based upon.
- For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
- with respect to the current state of the instance.
- format: int64
- minimum: 0
- type: integer
- reason:
- description: |-
- reason contains a programmatic identifier indicating the reason for the condition's last transition.
- Producers of specific condition types may define expected values and meanings for this field,
- and whether the values are considered a guaranteed API.
- The value should be a CamelCase string.
- This field may not be empty.
- maxLength: 1024
- minLength: 1
- pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
- type: string
- status:
- description: status of the condition, one of True, False, Unknown.
- enum:
- - "True"
- - "False"
- - Unknown
- type: string
- type:
- description: type of condition in CamelCase or in foo.example.com/CamelCase.
- maxLength: 316
- pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
- type: string
- required:
- - lastTransitionTime
- - message
- - reason
- - status
- - type
- type: object
- type: array
- knowledgesReadyFrac:
- description: |-
- "ReadyKnowledges / TotalKnowledges ready" as a human-readable string
- or "ready" if there are no knowledges configured.
- type: string
- ready:
- description: If the step is ready to be executed.
- type: boolean
- readyKnowledges:
- description: How many knowledges have been extracted.
- type: integer
- totalKnowledges:
- description: Total number of knowledges configured.
- type: integer
- required:
- - ready
- - readyKnowledges
- - totalKnowledges
- type: object
- required:
- - spec
- type: object
- served: true
- storage: true
- subresources:
- status: {}
diff --git a/config/crd/cortex.cloud_decisions.yaml b/config/crd/cortex.cloud_decisions.yaml
index 6a8f7b209..c4dc8acb9 100644
--- a/config/crd/cortex.cloud_decisions.yaml
+++ b/config/crd/cortex.cloud_decisions.yaml
@@ -379,52 +379,12 @@ spec:
type: number
description: Activations of the step for each host.
type: object
- stepRef:
+ stepName:
description: object reference to the scheduler step.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
+ type: string
required:
- activations
- - stepRef
+ - stepName
type: object
type: array
targetHost:
diff --git a/config/crd/cortex.cloud_pipelines.yaml b/config/crd/cortex.cloud_pipelines.yaml
index a1bad9528..43c7dea12 100644
--- a/config/crd/cortex.cloud_pipelines.yaml
+++ b/config/crd/cortex.cloud_pipelines.yaml
@@ -73,57 +73,101 @@ spec:
description: The ordered list of steps that make up this pipeline.
items:
properties:
+ description:
+ description: |-
+ Additional description of the step which helps understand its purpose
+ and decisions made by it.
+ type: string
+ impl:
+ description: The name of the scheduler step in the cortex implementation.
+ type: string
+ knowledges:
+ description: Knowledges this step depends on to be ready.
+ items:
+ description: ObjectReference contains enough information to
+ let you inspect or modify the referred object.
+ properties:
+ apiVersion:
+ description: API version of the referent.
+ type: string
+ fieldPath:
+ description: |-
+ If referring to a piece of an object instead of an entire object, this string
+ should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+ For example, if the object reference is to a container within a pod, this would take on a value like:
+ "spec.containers{name}" (where "name" refers to the name of the container that triggered
+ the event) or if no container name is specified "spec.containers[2]" (container with
+ index 2 in this pod). This syntax is chosen only to have some well-defined way of
+ referencing a part of an object.
+ type: string
+ kind:
+ description: |-
+ Kind of the referent.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ name:
+ description: |-
+ Name of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ namespace:
+ description: |-
+ Namespace of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+ type: string
+ resourceVersion:
+ description: |-
+ Specific resourceVersion to which this reference is made, if any.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+ type: string
+ uid:
+ description: |-
+ UID of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
mandatory:
default: true
description: Whether this step is mandatory for the pipeline
to be runnable.
type: boolean
- ref:
- description: Reference to the step.
+ opts:
+ description: Additional configuration for the extractor that
+ can be used
+ type: object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: The type of the scheduler step.
+ type: string
+ weigher:
+ description: If the type is "weigher", this contains additional
+ configuration for it.
properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
+ disabledValidations:
description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
+ The validations to disable for this step. If none are provided, all
+ applied validations are enabled.
+ properties:
+ sameSubjectNumberInOut:
+ description: |-
+ Whether to validate that no subjects are removed or added from the scheduler
+ step. This should only be disabled for scheduler steps that remove subjects.
+ Thus, if no value is provided, the default is false.
+ type: boolean
+ someSubjectsRemain:
+ description: |-
+ Whether to validate that, after running the step, there are remaining subjects.
+ This should only be disabled for scheduler steps that are expected to
+ remove all subjects.
+ type: boolean
+ type: object
type: object
- x-kubernetes-map-type: atomic
required:
+ - impl
- mandatory
- - ref
+ - type
type: object
type: array
type:
diff --git a/config/crd/cortex.cloud_steps.yaml b/config/crd/cortex.cloud_steps.yaml
deleted file mode 100644
index a02697165..000000000
--- a/config/crd/cortex.cloud_steps.yaml
+++ /dev/null
@@ -1,239 +0,0 @@
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- annotations:
- controller-gen.kubebuilder.io/version: v0.17.2
- name: steps.cortex.cloud
-spec:
- group: cortex.cloud
- names:
- kind: Step
- listKind: StepList
- plural: steps
- singular: step
- scope: Cluster
- versions:
- - additionalPrinterColumns:
- - jsonPath: .metadata.creationTimestamp
- name: Created
- type: date
- - jsonPath: .spec.schedulingDomain
- name: Domain
- type: string
- - jsonPath: .spec.type
- name: Type
- type: string
- - jsonPath: .status.ready
- name: Ready
- type: boolean
- - jsonPath: .status.knowledgesReadyFrac
- name: Knowledges
- type: string
- name: v1alpha1
- schema:
- openAPIV3Schema:
- description: Step is the Schema for the deschedulings API
- properties:
- apiVersion:
- description: |-
- APIVersion defines the versioned schema of this representation of an object.
- Servers should convert recognized schemas to the latest internal value, and
- may reject unrecognized values.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
- type: string
- kind:
- description: |-
- Kind is a string value representing the REST resource this object represents.
- Servers may infer this from the endpoint the client submits requests to.
- Cannot be updated.
- In CamelCase.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- metadata:
- type: object
- spec:
- description: spec defines the desired state of Step
- properties:
- description:
- description: |-
- Additional description of the step which helps understand its purpose
- and decisions made by it.
- type: string
- impl:
- description: The name of the scheduler step in the cortex implementation.
- type: string
- knowledges:
- description: Knowledges this step depends on to be ready.
- items:
- description: ObjectReference contains enough information to let
- you inspect or modify the referred object.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
- type: array
- opts:
- description: Additional configuration for the extractor that can be
- used
- type: object
- x-kubernetes-preserve-unknown-fields: true
- schedulingDomain:
- description: |-
- SchedulingDomain defines in which scheduling domain this step
- is used (e.g., nova, cinder, manila).
- type: string
- type:
- description: The type of the scheduler step.
- type: string
- weigher:
- description: If the type is "weigher", this contains additional configuration
- for it.
- properties:
- disabledValidations:
- description: |-
- The validations to disable for this step. If none are provided, all
- applied validations are enabled.
- properties:
- sameSubjectNumberInOut:
- description: |-
- Whether to validate that no subjects are removed or added from the scheduler
- step. This should only be disabled for scheduler steps that remove subjects.
- Thus, if no value is provided, the default is false.
- type: boolean
- someSubjectsRemain:
- description: |-
- Whether to validate that, after running the step, there are remaining subjects.
- This should only be disabled for scheduler steps that are expected to
- remove all subjects.
- type: boolean
- type: object
- type: object
- required:
- - impl
- - schedulingDomain
- - type
- type: object
- status:
- description: status defines the observed state of Step
- properties:
- conditions:
- description: The current status conditions of the step.
- items:
- description: Condition contains details for one aspect of the current
- state of this API Resource.
- properties:
- lastTransitionTime:
- description: |-
- lastTransitionTime is the last time the condition transitioned from one status to another.
- This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
- format: date-time
- type: string
- message:
- description: |-
- message is a human readable message indicating details about the transition.
- This may be an empty string.
- maxLength: 32768
- type: string
- observedGeneration:
- description: |-
- observedGeneration represents the .metadata.generation that the condition was set based upon.
- For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
- with respect to the current state of the instance.
- format: int64
- minimum: 0
- type: integer
- reason:
- description: |-
- reason contains a programmatic identifier indicating the reason for the condition's last transition.
- Producers of specific condition types may define expected values and meanings for this field,
- and whether the values are considered a guaranteed API.
- The value should be a CamelCase string.
- This field may not be empty.
- maxLength: 1024
- minLength: 1
- pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
- type: string
- status:
- description: status of the condition, one of True, False, Unknown.
- enum:
- - "True"
- - "False"
- - Unknown
- type: string
- type:
- description: type of condition in CamelCase or in foo.example.com/CamelCase.
- maxLength: 316
- pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
- type: string
- required:
- - lastTransitionTime
- - message
- - reason
- - status
- - type
- type: object
- type: array
- knowledgesReadyFrac:
- description: |-
- "ReadyKnowledges / TotalKnowledges ready" as a human-readable string
- or "ready" if there are no knowledges configured.
- type: string
- ready:
- description: If the step is ready to be executed.
- type: boolean
- readyKnowledges:
- description: How many knowledges have been extracted.
- type: integer
- totalKnowledges:
- description: Total number of knowledges configured.
- type: integer
- required:
- - ready
- - readyKnowledges
- - totalKnowledges
- type: object
- required:
- - spec
- type: object
- served: true
- storage: true
- subresources:
- status: {}
diff --git a/dist/chart/templates/crd/cortex.cloud_decisions.yaml b/dist/chart/templates/crd/cortex.cloud_decisions.yaml
index c7d4c07fd..52d2f07f7 100644
--- a/dist/chart/templates/crd/cortex.cloud_decisions.yaml
+++ b/dist/chart/templates/crd/cortex.cloud_decisions.yaml
@@ -385,52 +385,12 @@ spec:
type: number
description: Activations of the step for each host.
type: object
- stepRef:
+ stepName:
description: object reference to the scheduler step.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
+ type: string
required:
- activations
- - stepRef
+ - stepName
type: object
type: array
targetHost:
diff --git a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml
index 255365371..752b6e238 100644
--- a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml
+++ b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml
@@ -79,57 +79,101 @@ spec:
description: The ordered list of steps that make up this pipeline.
items:
properties:
+ description:
+ description: |-
+ Additional description of the step which helps understand its purpose
+ and decisions made by it.
+ type: string
+ impl:
+ description: The name of the scheduler step in the cortex implementation.
+ type: string
+ knowledges:
+ description: Knowledges this step depends on to be ready.
+ items:
+ description: ObjectReference contains enough information to
+ let you inspect or modify the referred object.
+ properties:
+ apiVersion:
+ description: API version of the referent.
+ type: string
+ fieldPath:
+ description: |-
+ If referring to a piece of an object instead of an entire object, this string
+ should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+ For example, if the object reference is to a container within a pod, this would take on a value like:
+ "spec.containers{name}" (where "name" refers to the name of the container that triggered
+ the event) or if no container name is specified "spec.containers[2]" (container with
+ index 2 in this pod). This syntax is chosen only to have some well-defined way of
+ referencing a part of an object.
+ type: string
+ kind:
+ description: |-
+ Kind of the referent.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ name:
+ description: |-
+ Name of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ namespace:
+ description: |-
+ Namespace of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+ type: string
+ resourceVersion:
+ description: |-
+ Specific resourceVersion to which this reference is made, if any.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+ type: string
+ uid:
+ description: |-
+ UID of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ type: array
mandatory:
default: true
description: Whether this step is mandatory for the pipeline
to be runnable.
type: boolean
- ref:
- description: Reference to the step.
+ opts:
+ description: Additional configuration for the extractor that
+ can be used
+ type: object
+ x-kubernetes-preserve-unknown-fields: true
+ type:
+ description: The type of the scheduler step.
+ type: string
+ weigher:
+ description: If the type is "weigher", this contains additional
+ configuration for it.
properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
+ disabledValidations:
description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
+ The validations to disable for this step. If none are provided, all
+ applied validations are enabled.
+ properties:
+ sameSubjectNumberInOut:
+ description: |-
+ Whether to validate that no subjects are removed or added from the scheduler
+ step. This should only be disabled for scheduler steps that remove subjects.
+ Thus, if no value is provided, the default is false.
+ type: boolean
+ someSubjectsRemain:
+ description: |-
+ Whether to validate that, after running the step, there are remaining subjects.
+ This should only be disabled for scheduler steps that are expected to
+ remove all subjects.
+ type: boolean
+ type: object
type: object
- x-kubernetes-map-type: atomic
required:
+ - impl
- mandatory
- - ref
+ - type
type: object
type: array
type:
diff --git a/dist/chart/templates/crd/cortex.cloud_steps.yaml b/dist/chart/templates/crd/cortex.cloud_steps.yaml
deleted file mode 100644
index 15722f7c0..000000000
--- a/dist/chart/templates/crd/cortex.cloud_steps.yaml
+++ /dev/null
@@ -1,246 +0,0 @@
-{{- if .Values.crd.enable }}
----
-apiVersion: apiextensions.k8s.io/v1
-kind: CustomResourceDefinition
-metadata:
- labels:
- {{- include "chart.labels" . | nindent 4 }}
- annotations:
- {{- if .Values.crd.keep }}
- "helm.sh/resource-policy": keep
- {{- end }}
- controller-gen.kubebuilder.io/version: v0.17.2
- name: steps.cortex.cloud
-spec:
- group: cortex.cloud
- names:
- kind: Step
- listKind: StepList
- plural: steps
- singular: step
- scope: Cluster
- versions:
- - additionalPrinterColumns:
- - jsonPath: .metadata.creationTimestamp
- name: Created
- type: date
- - jsonPath: .spec.schedulingDomain
- name: Domain
- type: string
- - jsonPath: .spec.type
- name: Type
- type: string
- - jsonPath: .status.ready
- name: Ready
- type: boolean
- - jsonPath: .status.knowledgesReadyFrac
- name: Knowledges
- type: string
- name: v1alpha1
- schema:
- openAPIV3Schema:
- description: Step is the Schema for the deschedulings API
- properties:
- apiVersion:
- description: |-
- APIVersion defines the versioned schema of this representation of an object.
- Servers should convert recognized schemas to the latest internal value, and
- may reject unrecognized values.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
- type: string
- kind:
- description: |-
- Kind is a string value representing the REST resource this object represents.
- Servers may infer this from the endpoint the client submits requests to.
- Cannot be updated.
- In CamelCase.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- metadata:
- type: object
- spec:
- description: spec defines the desired state of Step
- properties:
- description:
- description: |-
- Additional description of the step which helps understand its purpose
- and decisions made by it.
- type: string
- impl:
- description: The name of the scheduler step in the cortex implementation.
- type: string
- knowledges:
- description: Knowledges this step depends on to be ready.
- items:
- description: ObjectReference contains enough information to let
- you inspect or modify the referred object.
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
- type: array
- opts:
- description: Additional configuration for the extractor that can be
- used
- type: object
- x-kubernetes-preserve-unknown-fields: true
- schedulingDomain:
- description: |-
- SchedulingDomain defines in which scheduling domain this step
- is used (e.g., nova, cinder, manila).
- type: string
- type:
- description: The type of the scheduler step.
- type: string
- weigher:
- description: If the type is "weigher", this contains additional configuration
- for it.
- properties:
- disabledValidations:
- description: |-
- The validations to disable for this step. If none are provided, all
- applied validations are enabled.
- properties:
- sameSubjectNumberInOut:
- description: |-
- Whether to validate that no subjects are removed or added from the scheduler
- step. This should only be disabled for scheduler steps that remove subjects.
- Thus, if no value is provided, the default is false.
- type: boolean
- someSubjectsRemain:
- description: |-
- Whether to validate that, after running the step, there are remaining subjects.
- This should only be disabled for scheduler steps that are expected to
- remove all subjects.
- type: boolean
- type: object
- type: object
- required:
- - impl
- - schedulingDomain
- - type
- type: object
- status:
- description: status defines the observed state of Step
- properties:
- conditions:
- description: The current status conditions of the step.
- items:
- description: Condition contains details for one aspect of the current
- state of this API Resource.
- properties:
- lastTransitionTime:
- description: |-
- lastTransitionTime is the last time the condition transitioned from one status to another.
- This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
- format: date-time
- type: string
- message:
- description: |-
- message is a human readable message indicating details about the transition.
- This may be an empty string.
- maxLength: 32768
- type: string
- observedGeneration:
- description: |-
- observedGeneration represents the .metadata.generation that the condition was set based upon.
- For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
- with respect to the current state of the instance.
- format: int64
- minimum: 0
- type: integer
- reason:
- description: |-
- reason contains a programmatic identifier indicating the reason for the condition's last transition.
- Producers of specific condition types may define expected values and meanings for this field,
- and whether the values are considered a guaranteed API.
- The value should be a CamelCase string.
- This field may not be empty.
- maxLength: 1024
- minLength: 1
- pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
- type: string
- status:
- description: status of the condition, one of True, False, Unknown.
- enum:
- - "True"
- - "False"
- - Unknown
- type: string
- type:
- description: type of condition in CamelCase or in foo.example.com/CamelCase.
- maxLength: 316
- pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
- type: string
- required:
- - lastTransitionTime
- - message
- - reason
- - status
- - type
- type: object
- type: array
- knowledgesReadyFrac:
- description: |-
- "ReadyKnowledges / TotalKnowledges ready" as a human-readable string
- or "ready" if there are no knowledges configured.
- type: string
- ready:
- description: If the step is ready to be executed.
- type: boolean
- readyKnowledges:
- description: How many knowledges have been extracted.
- type: integer
- totalKnowledges:
- description: Total number of knowledges configured.
- type: integer
- required:
- - ready
- - readyKnowledges
- - totalKnowledges
- type: object
- required:
- - spec
- type: object
- served: true
- storage: true
- subresources:
- status: {}
-{{- end -}}
diff --git a/helm/bundles/cortex-cinder/templates/steps.yaml b/helm/bundles/cortex-cinder/templates/steps.yaml
deleted file mode 100644
index 73b314ff7..000000000
--- a/helm/bundles/cortex-cinder/templates/steps.yaml
+++ /dev/null
@@ -1 +0,0 @@
----
\ No newline at end of file
diff --git a/helm/bundles/cortex-ironcore/templates/pipelines.yaml b/helm/bundles/cortex-ironcore/templates/pipelines.yaml
index 60d7dae1e..231e95e47 100644
--- a/helm/bundles/cortex-ironcore/templates/pipelines.yaml
+++ b/helm/bundles/cortex-ironcore/templates/pipelines.yaml
@@ -10,5 +10,11 @@ spec:
type: filter-weigher
createDecisions: true
steps:
- - ref: {name: machinepools-noop}
+ - type: weigher
+ impl: noop
+ description: |
+ This is only a passthrough step which assigns a zero-weight to all machinepool
+ candidates. It is used as a placeholder step in the ironcore machines scheduler
+ pipeline.
+ knowledges: []
mandatory: false
diff --git a/helm/bundles/cortex-ironcore/templates/steps.yaml b/helm/bundles/cortex-ironcore/templates/steps.yaml
deleted file mode 100644
index f2a15990e..000000000
--- a/helm/bundles/cortex-ironcore/templates/steps.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: machinepools-noop
-spec:
- schedulingDomain: machines
- type: weigher
- impl: noop
- description: |
- This is only a passthrough step which assigns a zero-weight to all machinepool
- candidates. It is used as a placeholder step in the ironcore machines scheduler
- pipeline.
- knowledges: []
diff --git a/helm/bundles/cortex-manila/templates/pipelines.yaml b/helm/bundles/cortex-manila/templates/pipelines.yaml
index 60970fb1e..aba1e5313 100644
--- a/helm/bundles/cortex-manila/templates/pipelines.yaml
+++ b/helm/bundles/cortex-manila/templates/pipelines.yaml
@@ -12,5 +12,21 @@ spec:
Cortex returns a ranked list of hosts back to manila for final selection.
type: filter-weigher
steps:
- - ref: {name: netapp-cpu-usage-balancing-manila}
+ - type: weigher
+ impl: netapp_cpu_usage_balancing
+ description: |
+ This step uses netapp storage pool cpu metrics condensed into a feature
+ to balance manila share placements across available storage pools.
+ Its main purpose is to avoid cpu overutilization on a storage pool which
+ may lead to performance degradation for shares placed on that pool.
+ opts:
+ # Min-max scaling for gap-fitting based on CPU usage (pct)
+ avgCPUUsageLowerBound: 0 # pct
+ avgCPUUsageUpperBound: 10 # pct
+ avgCPUUsageActivationLowerBound: 0.0
+ avgCPUUsageActivationUpperBound: -0.75
+ maxCPUUsageLowerBound: 0 # pct
+ maxCPUUsageUpperBound: 10 # pct
+ maxCPUUsageActivationLowerBound: 0.0
+ maxCPUUsageActivationUpperBound: -0.25
mandatory: false
diff --git a/helm/bundles/cortex-manila/templates/steps.yaml b/helm/bundles/cortex-manila/templates/steps.yaml
deleted file mode 100644
index 5ec8882d6..000000000
--- a/helm/bundles/cortex-manila/templates/steps.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: netapp-cpu-usage-balancing-manila
-spec:
- schedulingDomain: manila
- type: weigher
- impl: netapp_cpu_usage_balancing
- description: |
- This step uses netapp storage pool cpu metrics condensed into a feature
- to balance manila share placements across available storage pools.
- Its main purpose is to avoid cpu overutilization on a storage pool which
- may lead to performance degradation for shares placed on that pool.
- opts:
- # Min-max scaling for gap-fitting based on CPU usage (pct)
- avgCPUUsageLowerBound: 0 # pct
- avgCPUUsageUpperBound: 10 # pct
- avgCPUUsageActivationLowerBound: 0.0
- avgCPUUsageActivationUpperBound: -0.75
- maxCPUUsageLowerBound: 0 # pct
- maxCPUUsageUpperBound: 10 # pct
- maxCPUUsageActivationLowerBound: 0.0
- maxCPUUsageActivationUpperBound: -0.25
\ No newline at end of file
diff --git a/helm/bundles/cortex-nova/templates/pipelines.yaml b/helm/bundles/cortex-nova/templates/pipelines.yaml
index 981ab4b58..0009c58e2 100644
--- a/helm/bundles/cortex-nova/templates/pipelines.yaml
+++ b/helm/bundles/cortex-nova/templates/pipelines.yaml
@@ -15,13 +15,73 @@ spec:
type: filter-weigher
createDecisions: false
steps:
- - ref: {name: vmware-hana-binpacking}
+ - type: weigher
+ impl: vmware_hana_binpacking
+ description: |
+ This step pulls HANA VMs onto the smallest possible gaps on HANA-exclusive
+ VMware hosts. In this way hosts with much free space are held free for
+ larger HANA VMs, improving overall packing efficiency for HANA workloads.
+ knowledges:
+ - name: host-utilization
+ - name: host-capabilities
+ opts:
+ ramUtilizedAfterLowerBoundPct: 0
+ ramUtilizedAfterUpperBoundPct: 100
+ ramUtilizedAfterActivationLowerBound: 0.0
+ ramUtilizedAfterActivationUpperBound: 1.0
mandatory: false
- - ref: {name: vmware-general-purpose-balancing}
+ - type: weigher
+ impl: vmware_general_purpose_balancing
+ description: |
+ This step balances non-HANA VMs across non-HANA exclusive VMware hosts. It
+ pulls vms onto the freeest hosts possible to ensure an even distribution of
+ workloads across the available infrastructure.
+ knowledges:
+ - name: host-utilization
+ - name: host-capabilities
+ opts:
+ ramUtilizedLowerBoundPct: 0
+ ramUtilizedUpperBoundPct: 100
+ ramUtilizedActivationLowerBound: 1.0
+ ramUtilizedActivationUpperBound: 0.0
mandatory: false
- - ref: {name: vmware-avoid-long-term-contended-hosts}
+ - type: weigher
+ impl: vmware_avoid_long_term_contended_hosts
+ description: |
+ This step avoids placing vms on vmware hosts with a high CPU contention over
+ a longer period of time, based on vrops contention metrics. In particular,
+ this step looks at a longer time window of 4 weeks to identify hosts that
+ are consistently contended.
+ knowledges:
+ - name: vmware-long-term-contended-hosts
+ opts:
+ avgCPUContentionLowerBound: 0 # pct
+ avgCPUContentionUpperBound: 10 # pct
+ avgCPUContentionActivationLowerBound: 0.0
+ avgCPUContentionActivationUpperBound: -0.75
+ maxCPUContentionLowerBound: 0 # pct
+ maxCPUContentionUpperBound: 10 # pct
+ maxCPUContentionActivationLowerBound: 0.0
+ maxCPUContentionActivationUpperBound: -0.25
mandatory: false
- - ref: {name: vmware-avoid-short-term-contended-hosts}
+ - type: weigher
+ impl: vmware_avoid_short_term_contended_hosts
+ description: |
+ This step avoids placing vms on vmware hosts with a high CPU contention over
+ a shorter period of time, based on vrops contention metrics. In particular,
+ this step looks at a shorter time window of 20 minutes to identify hosts that
+ are currently contended.
+ knowledges:
+ - name: vmware-short-term-contended-hosts
+ opts:
+ avgCPUContentionLowerBound: 0 # pct
+ avgCPUContentionUpperBound: 10 # pct
+ avgCPUContentionActivationLowerBound: 0.0
+ avgCPUContentionActivationUpperBound: -0.75
+ maxCPUContentionLowerBound: 0 # pct
+ maxCPUContentionUpperBound: 10 # pct
+ maxCPUContentionActivationLowerBound: 0.0
+ maxCPUContentionActivationUpperBound: -0.25
mandatory: false
---
apiVersion: cortex.cloud/v1alpha1
@@ -59,17 +119,85 @@ spec:
createDecisions: true
{{- end }}
steps:
- - ref: {name: filter-host-instructions}
- - ref: {name: filter-has-enough-capacity}
- - ref: {name: filter-has-requested-traits}
- - ref: {name: filter-has-accelerators}
- - ref: {name: filter-correct-az}
- - ref: {name: filter-status-conditions}
- - ref: {name: filter-maintenance}
- - ref: {name: filter-external-customer}
- - ref: {name: filter-packed-virtqueue}
- - ref: {name: filter-allowed-projects}
- - ref: {name: filter-capabilities}
+ - type: filter
+ impl: filter_host_instructions
+ description: |
+ This step will consider the `ignore_hosts` and `force_hosts` instructions
+ from the nova scheduler request spec to filter out or exclusively allow
+ certain hosts.
+ knowledges: []
+ - type: filter
+ impl: filter_has_enough_capacity
+ description: |
+ This step will filter out hosts that do not have enough available capacity
+ to host the requested flavor. If enabled, this step will subtract the
+ current reservations residing on this host from the available capacity.
+ opts:
+ # If reserved space should be locked even for matching requests.
+ # For the reservations pipeline, we don't want to unlock
+ # reserved space, to avoid reservations for the same project
+ # and flavor to overlap.
+ lockReserved: true
+ - type: filter
+ impl: filter_has_requested_traits
+ description: |
+ This step filters hosts that do not have the requested traits given by the
+ nova flavor extra spec: "trait:": "forbidden" means the host must
+ not have the specified trait. "trait:": "required" means the host
+ must have the specified trait.
+ - type: filter
+ impl: filter_has_accelerators
+ description: |
+ This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if
+ the nova flavor extra specs request accelerators via "accel:device_profile".
+ - type: filter
+ impl: filter_correct_az
+ description: |
+ This step will filter out hosts whose aggregate information indicates they
+ are not placed in the requested availability zone.
+ - type: filter
+ impl: filter_status_conditions
+ description: |
+ This step will filter out hosts for which the hypervisor status conditions
+ do not meet the expected values, for example, that the hypervisor is ready
+ and not disabled.
+ - type: filter
+ impl: filter_maintenance
+ description: |
+ This step will filter out hosts that are currently in maintenance mode that
+ prevents scheduling, for example, manual maintenance or termination.
+ - type: filter
+ impl: filter_external_customer
+ description: |
+ This step prefix-matches the domain name for external customer domains and
+ filters out hosts that are not intended for external customers. It considers
+ the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the
+ `domain_name` scheduler hint from the nova request spec.
+ opts:
+ domainNamePrefixes: ["iaas-"]
+ - type: filter
+ impl: filter_packed_virtqueue
+ description: |
+ If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the
+ image properties contain the `hw_virtio_packed_ring` key, this step will
+ filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait.
+ - type: filter
+ impl: filter_allowed_projects
+ description: |
+ This step filters hosts based on allowed projects defined in the
+ hypervisor resource. Note that hosts allowing all projects are still
+ accessible and will not be filtered out. In this way some hypervisors
+ are made accessible to some projects only.
+ - type: filter
+ impl: filter_capabilities
+ description: |
+ This step will filter out hosts that do not meet the compute capabilities
+ requested by the nova flavor extra specs, like `{"arch": "x86_64",
+ "maxphysaddr:bits": 46, ...}`.
+
+ Note: currently, advanced boolean/numeric operators for the capabilities
+ like `>`, `!`, ... are not supported because they are not used by any of our
+ flavors in production.
---
apiVersion: cortex.cloud/v1alpha1
kind: Pipeline
@@ -86,5 +214,13 @@ spec:
createDecisions: true
{{- end }}
steps:
- - ref: {name: avoid-high-steal-pct}
+ - type: descheduler
+ impl: avoid_high_steal_pct
+ description: |
+ This step will deschedule VMs once they reach this CPU steal percentage over
+ the observed time span.
+ knowledges:
+ - name: kvm-libvirt-domain-cpu-steal-pct
+ opts:
+ maxStealPctOverObservedTimeSpan: 20.0
mandatory: false
diff --git a/helm/bundles/cortex-nova/templates/steps.yaml b/helm/bundles/cortex-nova/templates/steps.yaml
deleted file mode 100644
index c958f74ee..000000000
--- a/helm/bundles/cortex-nova/templates/steps.yaml
+++ /dev/null
@@ -1,263 +0,0 @@
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: vmware-hana-binpacking
-spec:
- schedulingDomain: nova
- type: weigher
- impl: vmware_hana_binpacking
- description: |
- This step pulls HANA VMs onto the smallest possible gaps on HANA-exclusive
- VMware hosts. In this way hosts with much free space are held free for
- larger HANA VMs, improving overall packing efficiency for HANA workloads.
- knowledges:
- - name: host-utilization
- - name: host-capabilities
- opts:
- ramUtilizedAfterLowerBoundPct: 0
- ramUtilizedAfterUpperBoundPct: 100
- ramUtilizedAfterActivationLowerBound: 0.0
- ramUtilizedAfterActivationUpperBound: 1.0
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: vmware-general-purpose-balancing
-spec:
- schedulingDomain: nova
- type: weigher
- impl: vmware_general_purpose_balancing
- description: |
- This step balances non-HANA VMs across non-HANA exclusive VMware hosts. It
- pulls vms onto the freeest hosts possible to ensure an even distribution of
- workloads across the available infrastructure.
- knowledges:
- - name: host-utilization
- - name: host-capabilities
- opts:
- ramUtilizedLowerBoundPct: 0
- ramUtilizedUpperBoundPct: 100
- ramUtilizedActivationLowerBound: 1.0
- ramUtilizedActivationUpperBound: 0.0
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: vmware-avoid-long-term-contended-hosts
-spec:
- schedulingDomain: nova
- type: weigher
- impl: vmware_avoid_long_term_contended_hosts
- description: |
- This step avoids placing vms on vmware hosts with a high CPU contention over
- a longer period of time, based on vrops contention metrics. In particular,
- this step looks at a longer time window of 4 weeks to identify hosts that
- are consistently contended.
- knowledges:
- - name: vmware-long-term-contended-hosts
- opts:
- avgCPUContentionLowerBound: 0 # pct
- avgCPUContentionUpperBound: 10 # pct
- avgCPUContentionActivationLowerBound: 0.0
- avgCPUContentionActivationUpperBound: -0.75
- maxCPUContentionLowerBound: 0 # pct
- maxCPUContentionUpperBound: 10 # pct
- maxCPUContentionActivationLowerBound: 0.0
- maxCPUContentionActivationUpperBound: -0.25
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: vmware-avoid-short-term-contended-hosts
-spec:
- schedulingDomain: nova
- type: weigher
- impl: vmware_avoid_short_term_contended_hosts
- description: |
- This step avoids placing vms on vmware hosts with a high CPU contention over
- a shorter period of time, based on vrops contention metrics. In particular,
- this step looks at a shorter time window of 20 minutes to identify hosts that
- are currently contended.
- knowledges:
- - name: vmware-short-term-contended-hosts
- opts:
- avgCPUContentionLowerBound: 0 # pct
- avgCPUContentionUpperBound: 10 # pct
- avgCPUContentionActivationLowerBound: 0.0
- avgCPUContentionActivationUpperBound: -0.75
- maxCPUContentionLowerBound: 0 # pct
- maxCPUContentionUpperBound: 10 # pct
- maxCPUContentionActivationLowerBound: 0.0
- maxCPUContentionActivationUpperBound: -0.25
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-host-instructions
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_host_instructions
- description: |
- This step will consider the `ignore_hosts` and `force_hosts` instructions
- from the nova scheduler request spec to filter out or exclusively allow
- certain hosts.
- knowledges: []
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-has-enough-capacity
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_has_enough_capacity
- description: |
- This step will filter out hosts that do not have enough available capacity
- to host the requested flavor. If enabled, this step will subtract the
- current reservations residing on this host from the available capacity.
- opts:
- # If reserved space should be locked even for matching requests.
- # For the reservations pipeline, we don't want to unlock
- # reserved space, to avoid reservations for the same project
- # and flavor to overlap.
- lockReserved: true
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-has-requested-traits
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_has_requested_traits
- description: |
- This step filters hosts that do not have the requested traits given by the
- nova flavor extra spec: "trait:": "forbidden" means the host must
- not have the specified trait. "trait:": "required" means the host
- must have the specified trait.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-has-accelerators
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_has_accelerators
- description: |
- This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if
- the nova flavor extra specs request accelerators via "accel:device_profile".
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-correct-az
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_correct_az
- description: |
- This step will filter out hosts whose aggregate information indicates they
- are not placed in the requested availability zone.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-status-conditions
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_status_conditions
- description: |
- This step will filter out hosts for which the hypervisor status conditions
- do not meet the expected values, for example, that the hypervisor is ready
- and not disabled.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-maintenance
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_maintenance
- description: |
- This step will filter out hosts that are currently in maintenance mode that
- prevents scheduling, for example, manual maintenance or termination.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-external-customer
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_external_customer
- description: |
- This step prefix-matches the domain name for external customer domains and
- filters out hosts that are not intended for external customers. It considers
- the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the
- `domain_name` scheduler hint from the nova request spec.
- opts:
- domainNamePrefixes: ["iaas-"]
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-packed-virtqueue
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_packed_virtqueue
- description: |
- If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the
- image properties contain the `hw_virtio_packed_ring` key, this step will
- filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-allowed-projects
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_allowed_projects
- description: |
- This step filters hosts based on allowed projects defined in the
- hypervisor resource. Note that hosts allowing all projects are still
- accessible and will not be filtered out. In this way some hypervisors
- are made accessible to some projects only.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: filter-capabilities
-spec:
- schedulingDomain: nova
- type: filter
- impl: filter_capabilities
- description: |
- This step will filter out hosts that do not meet the compute capabilities
- requested by the nova flavor extra specs, like `{"arch": "x86_64",
- "maxphysaddr:bits": 46, ...}`.
-
- Note: currently, advanced boolean/numeric operators for the capabilities
- like `>`, `!`, ... are not supported because they are not used by any of our
- flavors in production.
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: avoid-high-steal-pct
-spec:
- schedulingDomain: nova
- type: descheduler
- impl: avoid_high_steal_pct
- description: |
- This step will deschedule VMs once they reach this CPU steal percentage over
- the observed time span.
- knowledges: []
- opts:
- maxStealPctOverObservedTimeSpan: 20.0
diff --git a/helm/bundles/cortex-pods/templates/pipelines.yaml b/helm/bundles/cortex-pods/templates/pipelines.yaml
index 6059547e5..aec8db635 100644
--- a/helm/bundles/cortex-pods/templates/pipelines.yaml
+++ b/helm/bundles/cortex-pods/templates/pipelines.yaml
@@ -10,5 +10,10 @@ spec:
type: filter-weigher
createDecisions: true
steps:
- - ref: { name: pods-noop }
+ - type: filter
+ impl: noop
+ description: |
+ This is only a passthrough step which lets all pod candidates through.
+ It is used as a placeholder step in the pods scheduler pipeline.
+ knowledges: []
mandatory: false
diff --git a/helm/bundles/cortex-pods/templates/steps.yaml b/helm/bundles/cortex-pods/templates/steps.yaml
deleted file mode 100644
index ad926bf04..000000000
--- a/helm/bundles/cortex-pods/templates/steps.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-apiVersion: cortex.cloud/v1alpha1
-kind: Step
-metadata:
- name: pods-noop
-spec:
- schedulingDomain: pods
- type: filter
- impl: noop
- description: |
- This is only a passthrough step which lets all pod candidates through.
- It is used as a placeholder step in the pods scheduler pipeline.
- knowledges: []
diff --git a/internal/knowledge/kpis/plugins/deployment/step_state.go b/internal/knowledge/kpis/plugins/deployment/step_state.go
deleted file mode 100644
index f39cffe83..000000000
--- a/internal/knowledge/kpis/plugins/deployment/step_state.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright SAP SE
-// SPDX-License-Identifier: Apache-2.0
-
-package deployment
-
-import (
- "context"
-
- "github.com/cobaltcore-dev/cortex/api/v1alpha1"
- "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins"
- "github.com/cobaltcore-dev/cortex/pkg/conf"
- "github.com/cobaltcore-dev/cortex/pkg/db"
- "github.com/prometheus/client_golang/prometheus"
- "k8s.io/apimachinery/pkg/api/meta"
- "sigs.k8s.io/controller-runtime/pkg/client"
-)
-
-type StepStateKPIOpts struct {
- // The scheduling domain to filter steps by.
- StepSchedulingDomain v1alpha1.SchedulingDomain `json:"stepSchedulingDomain"`
-}
-
-// KPI observing the state of step resources managed by cortex.
-type StepStateKPI struct {
- // Common base for all KPIs that provides standard functionality.
- plugins.BaseKPI[StepStateKPIOpts]
-
- // Prometheus descriptor for the step state metric.
- counter *prometheus.Desc
-}
-
-func (StepStateKPI) GetName() string { return "step_state_kpi" }
-
-// Initialize the KPI.
-func (k *StepStateKPI) Init(db *db.DB, client client.Client, opts conf.RawOpts) error {
- if err := k.BaseKPI.Init(db, client, opts); err != nil {
- return err
- }
- k.counter = prometheus.NewDesc(
- "cortex_step_state",
- "State of cortex managed steps",
- []string{"operator", "step", "state"},
- nil,
- )
- return nil
-}
-
-// Conform to the prometheus collector interface by providing the descriptor.
-func (k *StepStateKPI) Describe(ch chan<- *prometheus.Desc) { ch <- k.counter }
-
-// Collect the step state metrics.
-func (k *StepStateKPI) Collect(ch chan<- prometheus.Metric) {
- // Get all steps with the specified step operator.
- stepList := &v1alpha1.StepList{}
- if err := k.Client.List(context.Background(), stepList); err != nil {
- return
- }
- var steps []v1alpha1.Step
- for _, step := range stepList.Items {
- if step.Spec.SchedulingDomain != k.Options.StepSchedulingDomain {
- continue
- }
- steps = append(steps, step)
- }
- // For each step, emit a metric with its state.
- for _, step := range steps {
- var state string
- switch {
- case meta.IsStatusConditionTrue(step.Status.Conditions, v1alpha1.StepConditionError):
- state = "error"
- case step.Status.Ready:
- state = "ready"
- default:
- state = "unknown"
- }
- ch <- prometheus.MustNewConstMetric(
- k.counter, prometheus.GaugeValue, 1,
- string(k.Options.StepSchedulingDomain), step.Name, state,
- )
- }
-}
diff --git a/internal/knowledge/kpis/plugins/deployment/step_state_test.go b/internal/knowledge/kpis/plugins/deployment/step_state_test.go
deleted file mode 100644
index b5f96e854..000000000
--- a/internal/knowledge/kpis/plugins/deployment/step_state_test.go
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright SAP SE
-// SPDX-License-Identifier: Apache-2.0
-
-package deployment
-
-import (
- "testing"
-
- "github.com/cobaltcore-dev/cortex/api/v1alpha1"
- "github.com/cobaltcore-dev/cortex/pkg/conf"
- "github.com/prometheus/client_golang/prometheus"
- v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "sigs.k8s.io/controller-runtime/pkg/client/fake"
-)
-
-func TestStepStateKPI_Init(t *testing.T) {
- kpi := &StepStateKPI{}
- if err := kpi.Init(nil, nil, conf.NewRawOpts(`{"stepSchedulingDomain": "test-operator"}`)); err != nil {
- t.Fatalf("expected no error, got %v", err)
- }
-}
-
-func TestStepStateKPI_Collect(t *testing.T) {
- scheme, err := v1alpha1.SchemeBuilder.Build()
- if err != nil {
- t.Fatalf("expected no error, got %v", err)
- }
-
- tests := []struct {
- name string
- steps []v1alpha1.Step
- operator string
- expectedCount int
- description string
- }{
- {
- name: "no steps",
- steps: []v1alpha1.Step{},
- operator: "test-operator",
- expectedCount: 0,
- description: "should not collect metrics when no steps exist",
- },
- {
- name: "single ready step",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step1"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: true,
- Conditions: []v1.Condition{},
- },
- },
- },
- operator: "test-operator",
- expectedCount: 1,
- description: "should collect metric for ready step",
- },
- {
- name: "step in error state",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step2"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: false,
- Conditions: []v1.Condition{
- {
- Type: v1alpha1.StepConditionError,
- Status: v1.ConditionTrue,
- },
- },
- },
- },
- },
- operator: "test-operator",
- expectedCount: 1,
- description: "should collect metric for error step",
- },
- {
- name: "multiple steps different states",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-ready"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: true,
- Conditions: []v1.Condition{},
- },
- },
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-error"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: false,
- Conditions: []v1.Condition{
- {
- Type: v1alpha1.StepConditionError,
- Status: v1.ConditionTrue,
- },
- },
- },
- },
- },
- operator: "test-operator",
- expectedCount: 2,
- description: "should collect metrics for all steps with different states",
- },
- {
- name: "filter by operator",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-correct-operator"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: true,
- Conditions: []v1.Condition{},
- },
- },
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-wrong-operator"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "other-operator"},
- Status: v1alpha1.StepStatus{
- Ready: true,
- Conditions: []v1.Condition{},
- },
- },
- },
- operator: "test-operator",
- expectedCount: 1,
- description: "should only collect metrics for steps with matching operator",
- },
- {
- name: "step with unknown state",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-unknown"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: false,
- Conditions: []v1.Condition{},
- },
- },
- },
- operator: "test-operator",
- expectedCount: 1,
- description: "should collect metric with unknown state for step without ready status or error condition",
- },
- {
- name: "error condition takes precedence over ready status",
- steps: []v1alpha1.Step{
- {
- ObjectMeta: v1.ObjectMeta{Name: "step-error-priority"},
- Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"},
- Status: v1alpha1.StepStatus{
- Ready: true,
- Conditions: []v1.Condition{
- {
- Type: v1alpha1.StepConditionError,
- Status: v1.ConditionTrue,
- },
- },
- },
- },
- },
- operator: "test-operator",
- expectedCount: 1,
- description: "should report error state even if ready status is true",
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- objects := make([]v1alpha1.Step, len(tt.steps))
- copy(objects, tt.steps)
-
- clientBuilder := fake.NewClientBuilder().WithScheme(scheme)
- for i := range objects {
- clientBuilder = clientBuilder.WithObjects(&objects[i])
- }
- client := clientBuilder.Build()
-
- kpi := &StepStateKPI{}
- if err := kpi.Init(nil, client, conf.NewRawOpts(`{"stepSchedulingDomain": "`+tt.operator+`"}`)); err != nil {
- t.Fatalf("expected no error, got %v", err)
- }
-
- ch := make(chan prometheus.Metric, 10)
- kpi.Collect(ch)
- close(ch)
-
- metricsCount := 0
- for range ch {
- metricsCount++
- }
-
- if metricsCount != tt.expectedCount {
- t.Errorf("%s: expected %d metrics, got %d", tt.description, tt.expectedCount, metricsCount)
- }
- })
- }
-}
-
-func TestStepStateKPI_GetName(t *testing.T) {
- kpi := &StepStateKPI{}
- expectedName := "step_state_kpi"
- if name := kpi.GetName(); name != expectedName {
- t.Errorf("expected name %q, got %q", expectedName, name)
- }
-}
-
-func TestStepStateKPI_Describe(t *testing.T) {
- kpi := &StepStateKPI{}
- if err := kpi.Init(nil, nil, conf.NewRawOpts(`{"stepSchedulingDomain": "test-operator"}`)); err != nil {
- t.Fatalf("expected no error, got %v", err)
- }
-
- ch := make(chan *prometheus.Desc, 1)
- kpi.Describe(ch)
- close(ch)
-
- descCount := 0
- for range ch {
- descCount++
- }
-
- if descCount != 1 {
- t.Errorf("expected 1 descriptor, got %d", descCount)
- }
-}
diff --git a/internal/knowledge/kpis/supported_kpis.go b/internal/knowledge/kpis/supported_kpis.go
index e9f83810f..f98c8683e 100644
--- a/internal/knowledge/kpis/supported_kpis.go
+++ b/internal/knowledge/kpis/supported_kpis.go
@@ -30,5 +30,4 @@ var supportedKPIs = map[string]plugins.KPI{
"decision_state_kpi": &deployment.DecisionStateKPI{},
"kpi_state_kpi": &deployment.KPIStateKPI{},
"pipeline_state_kpi": &deployment.PipelineStateKPI{},
- "step_state_kpi": &deployment.StepStateKPI{},
}
diff --git a/internal/scheduling/decisions/cinder/pipeline_controller.go b/internal/scheduling/decisions/cinder/pipeline_controller.go
index 7de976f7b..66f4f5cca 100644
--- a/internal/scheduling/decisions/cinder/pipeline_controller.go
+++ b/internal/scheduling/decisions/cinder/pipeline_controller.go
@@ -8,7 +8,6 @@ import (
"encoding/json"
"errors"
"fmt"
- "slices"
"sync"
"time"
@@ -131,11 +130,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al
// The base controller will delegate the pipeline creation down to this method.
func (c *DecisionPipelineController) InitPipeline(
ctx context.Context,
- name string,
- steps []v1alpha1.Step,
+ p v1alpha1.Pipeline,
) (lib.Pipeline[api.ExternalSchedulerRequest], error) {
- return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor)
+ return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor)
}
func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
@@ -162,29 +160,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *
return pipeline.Spec.Type == c.PipelineType()
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeFilter,
- v1alpha1.StepTypeWeigher,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
// Watch knowledge changes so that we can reconfigure pipelines as needed.
WatchesMulticluster(
&v1alpha1.Knowledge{},
diff --git a/internal/scheduling/decisions/cinder/pipeline_controller_test.go b/internal/scheduling/decisions/cinder/pipeline_controller_test.go
index 0acbe0d34..2355687b3 100644
--- a/internal/scheduling/decisions/cinder/pipeline_controller_test.go
+++ b/internal/scheduling/decisions/cinder/pipeline_controller_test.go
@@ -85,7 +85,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainCinder,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: false,
@@ -114,7 +114,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainCinder,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: true,
@@ -170,7 +170,16 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
}
if tt.pipeline != nil {
- pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ SchedulingDomain: v1alpha1.SchedulingDomainCinder,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ })
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
@@ -284,7 +293,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainCinder,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -318,7 +327,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainCinder,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: false,
@@ -373,7 +382,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainCinder,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -411,7 +420,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
if tt.pipelineConfig != nil {
controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig
- pipeline, err := controller.InitPipeline(t.Context(), tt.pipelineConfig.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig)
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
@@ -480,25 +489,20 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
}{
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "test-plugin",
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-plugin",
},
},
expectError: true, // Expected because test-plugin is not in supportedSteps
@@ -507,7 +511,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps)
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ SchedulingDomain: v1alpha1.SchedulingDomainCinder,
+ Steps: tt.steps,
+ },
+ })
if tt.expectError && err == nil {
t.Error("Expected error but got none")
diff --git a/internal/scheduling/decisions/explanation/explainer.go b/internal/scheduling/decisions/explanation/explainer.go
index 1dc44b74e..a5f199fae 100644
--- a/internal/scheduling/decisions/explanation/explainer.go
+++ b/internal/scheduling/decisions/explanation/explainer.go
@@ -231,7 +231,7 @@ func (e *Explainer) calculateScoresFromSteps(inputWeights map[string]float64, st
for hostName := range currentScores {
if _, exists := stepResult.Activations[hostName]; !exists {
// Host not in this step's activations - will be deleted
- deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepRef.Name)
+ deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepName)
}
}
@@ -305,7 +305,7 @@ func (e *Explainer) findCriticalSteps(decision *v1alpha1.Decision) []string {
// If removing this step changes the winner, it's critical
if reducedWinner != baselineWinner {
- criticalSteps = append(criticalSteps, stepResult.StepRef.Name)
+ criticalSteps = append(criticalSteps, stepResult.StepName)
}
}
@@ -374,7 +374,7 @@ func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepRe
promotedToFirst := !wasFirst && isFirstAfter
impacts = append(impacts, StepImpact{
- Step: stepResult.StepRef.Name,
+ Step: stepResult.StepName,
ScoreBefore: scoreBefore,
ScoreAfter: scoreAfter,
ScoreDelta: scoreAfter - scoreBefore,
diff --git a/internal/scheduling/decisions/explanation/explainer_test.go b/internal/scheduling/decisions/explanation/explainer_test.go
index a483152f4..ed1d52e13 100644
--- a/internal/scheduling/decisions/explanation/explainer_test.go
+++ b/internal/scheduling/decisions/explanation/explainer_test.go
@@ -278,7 +278,7 @@ func WithHistoryRef(decision, historyDecision *v1alpha1.Decision) *v1alpha1.Deci
// Generic step creator
func Step(name string, activations map[string]float64) v1alpha1.StepResult {
return v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: name},
+ StepName: name,
Activations: activations,
}
}
@@ -446,28 +446,28 @@ func DecisionWithHistory(name, winner string) *DecisionBuilder {
// Step result builders for common pipeline steps
func ResourceWeigherStep(activations map[string]float64) v1alpha1.StepResult {
return v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: "resource-weigher"},
+ StepName: "resource-weigher",
Activations: activations,
}
}
func AvailabilityFilterStep(activations map[string]float64) v1alpha1.StepResult {
return v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: "availability-filter"},
+ StepName: "availability-filter",
Activations: activations,
}
}
func PlacementPolicyStep(activations map[string]float64) v1alpha1.StepResult {
return v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: "placement-policy"},
+ StepName: "placement-policy",
Activations: activations,
}
}
func WeigherStep(name string, activations map[string]float64) v1alpha1.StepResult {
return v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: name},
+ StepName: name,
Activations: activations,
}
}
diff --git a/internal/scheduling/decisions/machines/noop.go b/internal/scheduling/decisions/machines/noop.go
index 88caebbc5..3b0104aa6 100644
--- a/internal/scheduling/decisions/machines/noop.go
+++ b/internal/scheduling/decisions/machines/noop.go
@@ -15,7 +15,7 @@ type NoopFilter struct {
Alias string
}
-func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return nil
}
diff --git a/internal/scheduling/decisions/machines/pipeline_controller.go b/internal/scheduling/decisions/machines/pipeline_controller.go
index 14ddc35f3..d90f9c391 100644
--- a/internal/scheduling/decisions/machines/pipeline_controller.go
+++ b/internal/scheduling/decisions/machines/pipeline_controller.go
@@ -7,7 +7,6 @@ import (
"context"
"errors"
"fmt"
- "slices"
"sync"
"time"
@@ -173,11 +172,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al
// The base controller will delegate the pipeline creation down to this method.
func (c *DecisionPipelineController) InitPipeline(
ctx context.Context,
- name string,
- steps []v1alpha1.Step,
+ p v1alpha1.Pipeline,
) (lib.Pipeline[ironcore.MachinePipelineRequest], error) {
- return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor)
+ return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor)
}
func (c *DecisionPipelineController) handleMachine() handler.EventHandler {
@@ -261,29 +259,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *
return pipeline.Spec.Type == c.PipelineType()
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeFilter,
- v1alpha1.StepTypeWeigher,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
Named("cortex-machine-scheduler").
For(
&v1alpha1.Decision{},
diff --git a/internal/scheduling/decisions/machines/pipeline_controller_test.go b/internal/scheduling/decisions/machines/pipeline_controller_test.go
index 645961494..f7eb8bf3b 100644
--- a/internal/scheduling/decisions/machines/pipeline_controller_test.go
+++ b/internal/scheduling/decisions/machines/pipeline_controller_test.go
@@ -215,34 +215,30 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
}{
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
{
name: "noop step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- Spec: v1alpha1.StepSpec{
- Impl: "noop",
- Type: v1alpha1.StepTypeFilter,
- },
+ Impl: "noop",
+ Type: v1alpha1.StepTypeFilter,
},
},
expectError: false,
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- Spec: v1alpha1.StepSpec{
- Impl: "unsupported",
- Type: v1alpha1.StepTypeFilter,
- },
+ Impl: "unsupported",
+ Type: v1alpha1.StepTypeFilter,
},
},
expectError: true,
@@ -251,7 +247,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps)
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ SchedulingDomain: v1alpha1.SchedulingDomainMachines,
+ Steps: tt.steps,
+ },
+ })
if tt.expectError && err == nil {
t.Error("expected error but got none")
@@ -317,7 +322,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainMachines,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -350,7 +355,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainMachines,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: false,
@@ -396,7 +401,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainMachines,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
diff --git a/internal/scheduling/decisions/manila/pipeline_controller.go b/internal/scheduling/decisions/manila/pipeline_controller.go
index 06451586e..89f7b25e8 100644
--- a/internal/scheduling/decisions/manila/pipeline_controller.go
+++ b/internal/scheduling/decisions/manila/pipeline_controller.go
@@ -8,7 +8,6 @@ import (
"encoding/json"
"errors"
"fmt"
- "slices"
"sync"
"time"
@@ -131,11 +130,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al
// The base controller will delegate the pipeline creation down to this method.
func (c *DecisionPipelineController) InitPipeline(
ctx context.Context,
- name string,
- steps []v1alpha1.Step,
+ p v1alpha1.Pipeline,
) (lib.Pipeline[api.ExternalSchedulerRequest], error) {
- return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor)
+ return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor)
}
func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
@@ -162,29 +160,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *
return pipeline.Spec.Type == c.PipelineType()
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeFilter,
- v1alpha1.StepTypeWeigher,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
// Watch knowledge changes so that we can reconfigure pipelines as needed.
WatchesMulticluster(
&v1alpha1.Knowledge{},
diff --git a/internal/scheduling/decisions/manila/pipeline_controller_test.go b/internal/scheduling/decisions/manila/pipeline_controller_test.go
index 9ab5c0b25..837fca8a3 100644
--- a/internal/scheduling/decisions/manila/pipeline_controller_test.go
+++ b/internal/scheduling/decisions/manila/pipeline_controller_test.go
@@ -85,7 +85,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainManila,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: false,
@@ -114,7 +114,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainManila,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: true,
@@ -170,7 +170,12 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
}
if tt.pipeline != nil {
- pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: tt.pipeline.Name,
+ },
+ Spec: tt.pipeline.Spec,
+ })
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
@@ -284,7 +289,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainManila,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -318,7 +323,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainManila,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: false,
@@ -373,7 +378,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainManila,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -411,7 +416,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
if tt.pipelineConfig != nil {
controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig
- pipeline, err := controller.InitPipeline(t.Context(), tt.pipelineConfig.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig)
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
@@ -480,27 +485,22 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
}{
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
{
name: "supported netapp step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeWeigher,
- Impl: "netapp_cpu_usage_balancing",
- Opts: runtime.RawExtension{
- Raw: []byte(`{"AvgCPUUsageLowerBound": 0, "AvgCPUUsageUpperBound": 90, "MaxCPUUsageLowerBound": 0, "MaxCPUUsageUpperBound": 100}`),
- },
+ Type: v1alpha1.StepTypeWeigher,
+ Impl: "netapp_cpu_usage_balancing",
+ Opts: runtime.RawExtension{
+ Raw: []byte(`{"AvgCPUUsageLowerBound": 0, "AvgCPUUsageUpperBound": 90, "MaxCPUUsageLowerBound": 0, "MaxCPUUsageUpperBound": 100}`),
},
},
},
@@ -508,15 +508,10 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "unsupported-plugin",
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "unsupported-plugin",
},
},
expectError: true,
@@ -525,7 +520,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps)
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ SchedulingDomain: v1alpha1.SchedulingDomainManila,
+ Steps: tt.steps,
+ },
+ })
if tt.expectError && err == nil {
t.Error("Expected error but got none")
diff --git a/internal/scheduling/decisions/nova/pipeline_controller.go b/internal/scheduling/decisions/nova/pipeline_controller.go
index 68bdd6c72..d47aa7df7 100644
--- a/internal/scheduling/decisions/nova/pipeline_controller.go
+++ b/internal/scheduling/decisions/nova/pipeline_controller.go
@@ -8,7 +8,6 @@ import (
"encoding/json"
"errors"
"fmt"
- "slices"
"sync"
"time"
@@ -132,11 +131,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al
// The base controller will delegate the pipeline creation down to this method.
func (c *DecisionPipelineController) InitPipeline(
ctx context.Context,
- name string,
- steps []v1alpha1.Step,
+ p v1alpha1.Pipeline,
) (lib.Pipeline[api.ExternalSchedulerRequest], error) {
- return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor)
+ return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor)
}
func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error {
@@ -163,29 +161,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *
return pipeline.Spec.Type == c.PipelineType()
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeFilter,
- v1alpha1.StepTypeWeigher,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
// Watch knowledge changes so that we can reconfigure pipelines as needed.
WatchesMulticluster(
&v1alpha1.Knowledge{},
diff --git a/internal/scheduling/decisions/nova/pipeline_controller_test.go b/internal/scheduling/decisions/nova/pipeline_controller_test.go
index 2b6c637d7..7a53e274c 100644
--- a/internal/scheduling/decisions/nova/pipeline_controller_test.go
+++ b/internal/scheduling/decisions/nova/pipeline_controller_test.go
@@ -93,7 +93,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: false,
@@ -122,7 +122,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: true,
@@ -175,7 +175,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
Spec: v1alpha1.PipelineSpec{
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
expectError: true,
@@ -209,7 +209,12 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) {
}
if tt.pipeline != nil {
- pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: tt.pipeline.Name,
+ },
+ Spec: tt.pipeline.Spec,
+ })
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
@@ -268,57 +273,42 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
}{
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
{
name: "supported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "filter_status_conditions",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "filter_status_conditions",
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "filter_status_conditions",
},
},
expectError: false,
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "unsupported-plugin",
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "unsupported-plugin",
},
},
expectError: true,
},
{
name: "step with scoping options",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "scoped-filter",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "filter_status_conditions",
- Opts: runtime.RawExtension{
- Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`),
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "filter_status_conditions",
+ Opts: runtime.RawExtension{
+ Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`),
},
},
},
@@ -326,17 +316,12 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
},
{
name: "step with invalid scoping options",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "invalid-scoped-filter",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeFilter,
- Impl: "filter_status_conditions",
- Opts: runtime.RawExtension{
- Raw: []byte(`invalid json`),
- },
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "filter_status_conditions",
+ Opts: runtime.RawExtension{
+ Raw: []byte(`invalid json`),
},
},
},
@@ -346,7 +331,14 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- pipeline, err := controller.InitPipeline(t.Context(), "test-pipeline", tt.steps)
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Steps: tt.steps,
+ },
+ })
if tt.expectError && err == nil {
t.Error("Expected error but got none")
@@ -437,7 +429,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
pipelineConf: &v1alpha1.Pipeline{
@@ -448,7 +440,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
setupPipelineConfigs: true,
@@ -484,7 +476,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
pipelineConf: &v1alpha1.Pipeline{
@@ -495,7 +487,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
setupPipelineConfigs: true,
@@ -556,7 +548,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
pipelineConf: &v1alpha1.Pipeline{
@@ -567,7 +559,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
setupPipelineConfigs: true,
@@ -605,7 +597,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
setupPipelineConfigs: true,
@@ -643,7 +635,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainNova,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
setupPipelineConfigs: true,
@@ -689,7 +681,12 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) {
// Setup runtime pipeline if needed
if tt.pipeline != nil {
- pipeline, err := controller.InitPipeline(context.Background(), tt.pipeline.Name, []v1alpha1.Step{})
+ pipeline, err := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: tt.pipeline.Name,
+ },
+ Spec: tt.pipeline.Spec,
+ })
if err != nil {
t.Fatalf("Failed to init pipeline: %v", err)
}
diff --git a/internal/scheduling/decisions/pods/noop.go b/internal/scheduling/decisions/pods/noop.go
index ec7eece15..55f041747 100644
--- a/internal/scheduling/decisions/pods/noop.go
+++ b/internal/scheduling/decisions/pods/noop.go
@@ -15,7 +15,7 @@ type NoopFilter struct {
Alias string
}
-func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return nil
}
diff --git a/internal/scheduling/decisions/pods/pipeline_controller.go b/internal/scheduling/decisions/pods/pipeline_controller.go
index a969a36c5..dc9312c0b 100644
--- a/internal/scheduling/decisions/pods/pipeline_controller.go
+++ b/internal/scheduling/decisions/pods/pipeline_controller.go
@@ -7,7 +7,6 @@ import (
"context"
"errors"
"fmt"
- "slices"
"sync"
"time"
@@ -184,11 +183,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al
// The base controller will delegate the pipeline creation down to this method.
func (c *DecisionPipelineController) InitPipeline(
ctx context.Context,
- name string,
- steps []v1alpha1.Step,
+ p v1alpha1.Pipeline,
) (lib.Pipeline[pods.PodPipelineRequest], error) {
- return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor)
+ return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor)
}
func (c *DecisionPipelineController) handlePod() handler.EventHandler {
@@ -268,29 +266,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *
return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeFilter,
- v1alpha1.StepTypeWeigher,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
Named("cortex-pod-scheduler").
For(
&v1alpha1.Decision{},
diff --git a/internal/scheduling/decisions/pods/pipeline_controller_test.go b/internal/scheduling/decisions/pods/pipeline_controller_test.go
index 2521952d1..63642b26f 100644
--- a/internal/scheduling/decisions/pods/pipeline_controller_test.go
+++ b/internal/scheduling/decisions/pods/pipeline_controller_test.go
@@ -190,34 +190,30 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
}{
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
{
name: "noop step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- Spec: v1alpha1.StepSpec{
- Impl: "noop",
- Type: v1alpha1.StepTypeFilter,
- },
+ Impl: "noop",
+ Type: v1alpha1.StepTypeFilter,
},
},
expectError: false,
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- Spec: v1alpha1.StepSpec{
- Impl: "unsupported",
- Type: v1alpha1.StepTypeFilter,
- },
+ Impl: "unsupported",
+ Type: v1alpha1.StepTypeFilter,
},
},
expectError: true,
@@ -226,7 +222,14 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps)
+ pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ Steps: tt.steps,
+ },
+ })
if tt.expectError && err == nil {
t.Error("expected error but got none")
@@ -292,7 +295,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainPods,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
@@ -325,7 +328,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainPods,
CreateDecisions: false,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: false,
@@ -371,7 +374,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) {
Type: v1alpha1.PipelineTypeFilterWeigher,
SchedulingDomain: v1alpha1.SchedulingDomainPods,
CreateDecisions: true,
- Steps: []v1alpha1.StepInPipeline{},
+ Steps: []v1alpha1.StepSpec{},
},
},
createDecisions: true,
diff --git a/internal/scheduling/descheduling/nova/monitor.go b/internal/scheduling/descheduling/nova/monitor.go
index 998a73f64..239c2f921 100644
--- a/internal/scheduling/descheduling/nova/monitor.go
+++ b/internal/scheduling/descheduling/nova/monitor.go
@@ -83,8 +83,8 @@ type StepMonitor struct {
}
// Monitor a step by wrapping it with a StepMonitor.
-func monitorStep(step Step, conf v1alpha1.Step, monitor Monitor) StepMonitor {
- name := conf.Namespace + "/" + conf.Name
+func monitorStep(step Step, conf v1alpha1.StepSpec, monitor Monitor) StepMonitor {
+ name := conf.Impl
var runTimer prometheus.Observer
if monitor.stepRunTimer != nil {
runTimer = monitor.stepRunTimer.WithLabelValues(name)
@@ -102,7 +102,7 @@ func monitorStep(step Step, conf v1alpha1.Step, monitor Monitor) StepMonitor {
}
// Initialize the step with the database and options.
-func (m StepMonitor) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m StepMonitor) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return m.step.Init(ctx, client, step)
}
diff --git a/internal/scheduling/descheduling/nova/monitor_test.go b/internal/scheduling/descheduling/nova/monitor_test.go
index 4ee16c1a8..7c665af06 100644
--- a/internal/scheduling/descheduling/nova/monitor_test.go
+++ b/internal/scheduling/descheduling/nova/monitor_test.go
@@ -12,7 +12,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
- v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)
@@ -81,7 +80,7 @@ type mockMonitorStep struct {
runCalled bool
}
-func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
m.initCalled = true
return m.initError
}
@@ -98,7 +97,7 @@ func TestMonitorStep(t *testing.T) {
{VMID: "vm1", Reason: "test"},
},
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
@@ -118,12 +117,12 @@ func TestMonitorStep(t *testing.T) {
func TestStepMonitor_Init(t *testing.T) {
monitor := NewPipelineMonitor()
step := &mockMonitorStep{}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
client := fake.NewClientBuilder().Build()
- err := monitoredStep.Init(t.Context(), client, conf)
+ err := monitoredStep.Init(context.Background(), client, conf)
if err != nil {
t.Errorf("unexpected error: %v", err)
@@ -140,11 +139,11 @@ func TestStepMonitor_Init_WithError(t *testing.T) {
step := &mockMonitorStep{
initError: expectedErr,
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
client := fake.NewClientBuilder().Build()
- err := monitoredStep.Init(t.Context(), client, conf)
+ err := monitoredStep.Init(context.Background(), client, conf)
if !errors.Is(err, expectedErr) {
t.Errorf("expected error %v, got %v", expectedErr, err)
@@ -160,7 +159,7 @@ func TestStepMonitor_Run(t *testing.T) {
step := &mockMonitorStep{
decisions: decisions,
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
result, err := monitoredStep.Run()
@@ -178,7 +177,7 @@ func TestStepMonitor_Run(t *testing.T) {
}
// Verify that the counter was incremented
- counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step"))
+ counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step"))
if counterValue != 2.0 {
t.Errorf("expected counter value 2.0, got %f", counterValue)
}
@@ -190,7 +189,7 @@ func TestStepMonitor_Run_WithError(t *testing.T) {
step := &mockMonitorStep{
runError: expectedErr,
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
result, err := monitoredStep.Run()
@@ -204,7 +203,7 @@ func TestStepMonitor_Run_WithError(t *testing.T) {
}
// Counter should not be incremented on error
- counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step"))
+ counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step"))
if counterValue != 0.0 {
t.Errorf("expected counter value 0.0, got %f", counterValue)
}
@@ -215,7 +214,7 @@ func TestStepMonitor_Run_EmptyResult(t *testing.T) {
step := &mockMonitorStep{
decisions: []plugins.Decision{}, // Empty slice
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
result, err := monitoredStep.Run()
@@ -229,7 +228,7 @@ func TestStepMonitor_Run_EmptyResult(t *testing.T) {
}
// Counter should be 0 for empty results
- counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step"))
+ counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step"))
if counterValue != 0.0 {
t.Errorf("expected counter value 0.0, got %f", counterValue)
}
@@ -243,7 +242,7 @@ func TestMonitorStep_WithNilMonitor(t *testing.T) {
{VMID: "vm1", Reason: "test"},
},
}
- conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}}
+ conf := v1alpha1.StepSpec{Impl: "test-step"}
monitoredStep := monitorStep(step, conf, monitor)
// Should not panic with nil timers/counters
diff --git a/internal/scheduling/descheduling/nova/pipeline.go b/internal/scheduling/descheduling/nova/pipeline.go
index 107dc0d31..8bf60b14f 100644
--- a/internal/scheduling/descheduling/nova/pipeline.go
+++ b/internal/scheduling/descheduling/nova/pipeline.go
@@ -33,7 +33,7 @@ type Pipeline struct {
func (p *Pipeline) Init(
ctx context.Context,
- confedSteps []v1alpha1.Step,
+ confedSteps []v1alpha1.StepSpec,
supportedSteps map[string]Step,
) error {
@@ -41,18 +41,17 @@ func (p *Pipeline) Init(
// Load all steps from the configuration.
p.steps = make(map[string]Step, len(confedSteps))
for _, stepConf := range confedSteps {
- step, ok := supportedSteps[stepConf.Spec.Impl]
+ step, ok := supportedSteps[stepConf.Impl]
if !ok {
- return errors.New("descheduler: unsupported step: " + stepConf.Spec.Impl)
+ return errors.New("descheduler: unsupported step: " + stepConf.Impl)
}
step = monitorStep(step, stepConf, p.Monitor)
if err := step.Init(ctx, p.Client, stepConf); err != nil {
return err
}
- namespacedName := stepConf.Namespace + "/" + stepConf.Name
- p.steps[namespacedName] = step
- p.order = append(p.order, namespacedName)
- slog.Info("descheduler: added step", "name", namespacedName)
+ p.steps[stepConf.Impl] = step
+ p.order = append(p.order, stepConf.Impl)
+ slog.Info("descheduler: added step", "name", stepConf.Impl)
}
return nil
}
@@ -67,7 +66,7 @@ func (p *Pipeline) run() map[string][]plugins.Decision {
var lock sync.Mutex
decisionsByStep := map[string][]plugins.Decision{}
var wg sync.WaitGroup
- for namespacedName, step := range p.steps {
+ for stepName, step := range p.steps {
wg.Go(func() {
slog.Info("descheduler: running step")
decisions, err := step.Run()
@@ -82,7 +81,7 @@ func (p *Pipeline) run() map[string][]plugins.Decision {
slog.Info("descheduler: finished step")
lock.Lock()
defer lock.Unlock()
- decisionsByStep[namespacedName] = decisions
+ decisionsByStep[stepName] = decisions
})
}
wg.Wait()
diff --git a/internal/scheduling/descheduling/nova/pipeline_controller.go b/internal/scheduling/descheduling/nova/pipeline_controller.go
index a39c9f345..4a357a5ed 100644
--- a/internal/scheduling/descheduling/nova/pipeline_controller.go
+++ b/internal/scheduling/descheduling/nova/pipeline_controller.go
@@ -6,7 +6,6 @@ package nova
import (
"context"
"log/slog"
- "slices"
"time"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
@@ -47,13 +46,13 @@ func (c *DeschedulingsPipelineController) PipelineType() v1alpha1.PipelineType {
}
// The base controller will delegate the pipeline creation down to this method.
-func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (*Pipeline, error) {
+func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (*Pipeline, error) {
pipeline := &Pipeline{
Client: c.Client,
CycleDetector: c.CycleDetector,
- Monitor: c.Monitor.SubPipeline(name),
+ Monitor: c.Monitor.SubPipeline(p.Name),
}
- err := pipeline.Init(ctx, steps, supportedSteps)
+ err := pipeline.Init(ctx, p.Spec.Steps, supportedSteps)
return pipeline, err
}
@@ -114,28 +113,6 @@ func (c *DeschedulingsPipelineController) SetupWithManager(mgr ctrl.Manager, mcl
return pipeline.Spec.Type == c.PipelineType()
}),
).
- // Watch step changes so that we can turn on/off pipelines depending on
- // unready steps.
- WatchesMulticluster(
- &v1alpha1.Step{},
- handler.Funcs{
- CreateFunc: c.HandleStepCreated,
- UpdateFunc: c.HandleStepUpdated,
- DeleteFunc: c.HandleStepDeleted,
- },
- predicate.NewPredicateFuncs(func(obj client.Object) bool {
- step := obj.(*v1alpha1.Step)
- // Only react to steps matching the scheduling domain.
- if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova {
- return false
- }
- // Only react to filter and weigher steps.
- supportedTypes := []v1alpha1.StepType{
- v1alpha1.StepTypeDescheduler,
- }
- return slices.Contains(supportedTypes, step.Spec.Type)
- }),
- ).
// Watch knowledge changes so that we can reconfigure pipelines as needed.
WatchesMulticluster(
&v1alpha1.Knowledge{},
diff --git a/internal/scheduling/descheduling/nova/pipeline_controller_test.go b/internal/scheduling/descheduling/nova/pipeline_controller_test.go
index 16783e9af..78369e5a0 100644
--- a/internal/scheduling/descheduling/nova/pipeline_controller_test.go
+++ b/internal/scheduling/descheduling/nova/pipeline_controller_test.go
@@ -12,7 +12,6 @@ import (
"github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins"
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
"github.com/cobaltcore-dev/cortex/pkg/conf"
- metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -34,43 +33,34 @@ type mockControllerStep struct{}
func (m *mockControllerStep) Run() ([]plugins.Decision, error) {
return nil, nil
}
-func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return nil
}
func TestDeschedulingsPipelineController_InitPipeline(t *testing.T) {
tests := []struct {
name string
- steps []v1alpha1.Step
+ steps []v1alpha1.StepSpec
expectError bool
expectedError string
}{
{
name: "successful pipeline initialization",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeDescheduler,
- Impl: "mock-step",
- },
+ Type: v1alpha1.StepTypeDescheduler,
+ Impl: "mock-step",
},
},
expectError: false,
},
{
name: "unsupported step",
- steps: []v1alpha1.Step{
+ steps: []v1alpha1.StepSpec{
{
- ObjectMeta: metav1.ObjectMeta{
- Name: "unsupported-step",
- },
- Spec: v1alpha1.StepSpec{
- Type: v1alpha1.StepTypeDescheduler,
- Impl: "unsupported",
- },
+
+ Type: v1alpha1.StepTypeDescheduler,
+ Impl: "unsupported",
},
},
expectError: true,
@@ -78,7 +68,7 @@ func TestDeschedulingsPipelineController_InitPipeline(t *testing.T) {
},
{
name: "empty steps",
- steps: []v1alpha1.Step{},
+ steps: []v1alpha1.StepSpec{},
expectError: false,
},
}
diff --git a/internal/scheduling/descheduling/nova/pipeline_test.go b/internal/scheduling/descheduling/nova/pipeline_test.go
index d83d278b3..c5e00b9f8 100644
--- a/internal/scheduling/descheduling/nova/pipeline_test.go
+++ b/internal/scheduling/descheduling/nova/pipeline_test.go
@@ -11,7 +11,6 @@ import (
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
"github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins"
- v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
@@ -31,7 +30,7 @@ func (m *mockPipelineStep) Run() ([]plugins.Decision, error) {
return m.decisions, nil
}
-func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
if m.initError != nil {
return m.initError
}
@@ -43,7 +42,7 @@ func TestPipeline_Init(t *testing.T) {
tests := []struct {
name string
supportedSteps map[string]Step
- confedSteps []v1alpha1.Step
+ confedSteps []v1alpha1.StepSpec
expectedSteps int
expectedError bool
}{
@@ -52,12 +51,10 @@ func TestPipeline_Init(t *testing.T) {
supportedSteps: map[string]Step{
"test-step": &mockPipelineStep{},
},
- confedSteps: []v1alpha1.Step{
- {ObjectMeta: v1.ObjectMeta{Name: "step1"}, Spec: v1alpha1.StepSpec{
- Impl: "test-step",
- Type: v1alpha1.StepTypeDescheduler,
- }},
- },
+ confedSteps: []v1alpha1.StepSpec{{
+ Impl: "test-step",
+ Type: v1alpha1.StepTypeDescheduler,
+ }},
expectedSteps: 1,
},
{
@@ -65,12 +62,10 @@ func TestPipeline_Init(t *testing.T) {
supportedSteps: map[string]Step{
"test-step": &mockPipelineStep{},
},
- confedSteps: []v1alpha1.Step{
- {ObjectMeta: v1.ObjectMeta{Name: "step2"}, Spec: v1alpha1.StepSpec{
- Impl: "unsupported-step",
- Type: v1alpha1.StepTypeDescheduler,
- }},
- },
+ confedSteps: []v1alpha1.StepSpec{{
+ Impl: "unsupported-step",
+ Type: v1alpha1.StepTypeDescheduler,
+ }},
expectedError: true,
},
{
@@ -78,12 +73,10 @@ func TestPipeline_Init(t *testing.T) {
supportedSteps: map[string]Step{
"failing-step": &mockPipelineStep{initError: errors.New("init failed")},
},
- confedSteps: []v1alpha1.Step{
- {ObjectMeta: v1.ObjectMeta{Name: "step3"}, Spec: v1alpha1.StepSpec{
- Impl: "failing-step",
- Type: v1alpha1.StepTypeDescheduler,
- }},
- },
+ confedSteps: []v1alpha1.StepSpec{{
+ Impl: "failing-step",
+ Type: v1alpha1.StepTypeDescheduler,
+ }},
expectedError: true,
},
{
@@ -92,15 +85,15 @@ func TestPipeline_Init(t *testing.T) {
"step1": &mockPipelineStep{},
"step2": &mockPipelineStep{},
},
- confedSteps: []v1alpha1.Step{
- {ObjectMeta: v1.ObjectMeta{Name: "step1"}, Spec: v1alpha1.StepSpec{
+ confedSteps: []v1alpha1.StepSpec{
+ {
Impl: "step1",
Type: v1alpha1.StepTypeDescheduler,
- }},
- {ObjectMeta: v1.ObjectMeta{Name: "step2"}, Spec: v1alpha1.StepSpec{
+ },
+ {
Impl: "step2",
Type: v1alpha1.StepTypeDescheduler,
- }},
+ },
},
expectedSteps: 2,
},
diff --git a/internal/scheduling/descheduling/nova/plugins/base.go b/internal/scheduling/descheduling/nova/plugins/base.go
index 1ba37d25c..f312f402b 100644
--- a/internal/scheduling/descheduling/nova/plugins/base.go
+++ b/internal/scheduling/descheduling/nova/plugins/base.go
@@ -21,8 +21,8 @@ type BaseStep[Opts any] struct {
}
// Init the step with the database and options.
-func (s *BaseStep[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
- opts := conf.NewRawOptsBytes(step.Spec.Opts.Raw)
+func (s *BaseStep[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
+ opts := conf.NewRawOptsBytes(step.Opts.Raw)
if err := s.Load(opts); err != nil {
return err
}
diff --git a/internal/scheduling/descheduling/nova/plugins/base_test.go b/internal/scheduling/descheduling/nova/plugins/base_test.go
index 60830cf72..f646523bb 100644
--- a/internal/scheduling/descheduling/nova/plugins/base_test.go
+++ b/internal/scheduling/descheduling/nova/plugins/base_test.go
@@ -23,13 +23,11 @@ func (o MockOptions) Validate() error {
func TestBaseStep_Init(t *testing.T) {
step := BaseStep[MockOptions]{}
cl := fake.NewClientBuilder().Build()
- err := step.Init(t.Context(), cl, v1alpha1.Step{
- Spec: v1alpha1.StepSpec{
- Opts: runtime.RawExtension{Raw: []byte(`{
- "option1": "value1",
- "option2": 2
- }`)},
- },
+ err := step.Init(t.Context(), cl, v1alpha1.StepSpec{
+ Opts: runtime.RawExtension{Raw: []byte(`{
+ "option1": "value1",
+ "option2": 2
+ }`)},
})
if err != nil {
t.Fatalf("expected no error, got %v", err)
diff --git a/internal/scheduling/descheduling/nova/step.go b/internal/scheduling/descheduling/nova/step.go
index c9b16b761..7c53bc991 100644
--- a/internal/scheduling/descheduling/nova/step.go
+++ b/internal/scheduling/descheduling/nova/step.go
@@ -21,5 +21,5 @@ type Step interface {
// Get the VMs on their current hosts that should be considered for descheduling.
Run() ([]plugins.Decision, error)
// Configure the step with a database and options.
- Init(ctx context.Context, client client.Client, step v1alpha1.Step) error
+ Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error
}
diff --git a/internal/scheduling/lib/pipeline.go b/internal/scheduling/lib/pipeline.go
index b1a43363f..48b05b084 100644
--- a/internal/scheduling/lib/pipeline.go
+++ b/internal/scheduling/lib/pipeline.go
@@ -14,7 +14,6 @@ import (
"sync"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
- corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)
@@ -39,7 +38,7 @@ type pipeline[RequestType PipelineRequest] struct {
type StepWrapper[RequestType PipelineRequest] func(
ctx context.Context,
client client.Client,
- step v1alpha1.Step,
+ step v1alpha1.StepSpec,
impl Step[RequestType],
) (Step[RequestType], error)
@@ -49,7 +48,7 @@ func NewPipeline[RequestType PipelineRequest](
client client.Client,
name string,
supportedSteps map[string]func() Step[RequestType],
- confedSteps []v1alpha1.Step,
+ confedSteps []v1alpha1.StepSpec,
monitor PipelineMonitor,
) (Pipeline[RequestType], error) {
@@ -60,26 +59,25 @@ func NewPipeline[RequestType PipelineRequest](
pipelineMonitor := monitor.SubPipeline(name)
for _, stepConfig := range confedSteps {
- slog.Info("scheduler: configuring step", "name", stepConfig.Name, "impl", stepConfig.Spec.Impl)
+ slog.Info("scheduler: configuring step", "name", stepConfig.Impl)
slog.Info("supported:", "steps", maps.Keys(supportedSteps))
- makeStep, ok := supportedSteps[stepConfig.Spec.Impl]
+ makeStep, ok := supportedSteps[stepConfig.Impl]
if !ok {
- return nil, errors.New("unsupported scheduler step impl: " + stepConfig.Spec.Impl)
+ return nil, errors.New("unsupported scheduler step impl: " + stepConfig.Impl)
}
step := makeStep()
- if stepConfig.Spec.Type == v1alpha1.StepTypeWeigher && stepConfig.Spec.Weigher != nil {
- step = validateStep(step, stepConfig.Spec.Weigher.DisabledValidations)
+ if stepConfig.Type == v1alpha1.StepTypeWeigher && stepConfig.Weigher != nil {
+ step = validateStep(step, stepConfig.Weigher.DisabledValidations)
}
step = monitorStep(ctx, client, stepConfig, step, pipelineMonitor)
if err := step.Init(ctx, client, stepConfig); err != nil {
return nil, errors.New("failed to initialize pipeline step: " + err.Error())
}
- stepsByName[stepConfig.Name] = step
- order = append(order, stepConfig.Name)
+ stepsByName[stepConfig.Impl] = step
+ order = append(order, stepConfig.Impl)
slog.Info(
"scheduler: added step",
- "name", stepConfig.Name,
- "impl", stepConfig.Spec.Impl,
+ "name", stepConfig.Impl,
)
}
return &pipeline[RequestType]{
@@ -205,7 +203,7 @@ func (p *pipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResul
for _, stepName := range p.order {
if activations, ok := stepWeights[stepName]; ok {
result.StepResults = append(result.StepResults, v1alpha1.StepResult{
- StepRef: corev1.ObjectReference{Name: stepName},
+ StepName: stepName,
Activations: activations,
})
}
diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go
index a96f2ee21..73d44b3ab 100644
--- a/internal/scheduling/lib/pipeline_controller.go
+++ b/internal/scheduling/lib/pipeline_controller.go
@@ -26,7 +26,7 @@ type PipelineInitializer[PipelineType any] interface {
// This method is delegated to the parent controller, when a pipeline needs
// to be newly initialized or re-initialized to update it in the pipeline
// map.
- InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (PipelineType, error)
+ InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (PipelineType, error)
// Get the accepted pipeline type for this controller.
//
// This is used to filter pipelines when listing existing pipelines on
@@ -87,49 +87,37 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange(
}
log := ctrl.LoggerFrom(ctx)
old := obj.DeepCopy()
- // Get all configured steps for the pipeline.
- var steps []v1alpha1.Step
- obj.Status.TotalSteps, obj.Status.ReadySteps = len(obj.Spec.Steps), 0
- var err error
+
+ // Check if all steps are ready. If not, check if the step is mandatory.
+ obj.Status.TotalSteps = len(obj.Spec.Steps)
+ obj.Status.ReadySteps = 0
for _, step := range obj.Spec.Steps {
- stepConf := &v1alpha1.Step{}
- log.Info("checking step for pipeline", "pipelineName", obj.Name, "stepName", step.Ref.Name)
- if err = c.Get(ctx, client.ObjectKey{
- Name: step.Ref.Name,
- Namespace: step.Ref.Namespace,
- }, stepConf); err != nil {
- err = fmt.Errorf("failed to get step %s: %w", step.Ref.Name, err)
+ err := c.checkStepReady(ctx, &step)
+ if err == nil {
+ obj.Status.ReadySteps++
continue
}
- if !stepConf.Status.Ready {
- if step.Mandatory {
- err = fmt.Errorf("mandatory step %s not ready", step.Ref.Name)
+ if step.Mandatory {
+ obj.Status.Ready = false
+ meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{
+ Type: v1alpha1.PipelineConditionError,
+ Status: metav1.ConditionTrue,
+ Reason: "MandatoryStepNotReady",
+ Message: fmt.Sprintf("mandatory step %s not ready: %s", step.Impl, err.Error()),
+ })
+ patch := client.MergeFrom(old)
+ if err := c.Status().Patch(ctx, obj, patch); err != nil {
+ log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name)
}
- log.Info("step not ready", "pipelineName", obj.Name, "stepName", step.Ref.Name)
- continue
+ delete(c.Pipelines, obj.Name)
+ delete(c.PipelineConfigs, obj.Name)
+ return
}
- obj.Status.ReadySteps++
- steps = append(steps, *stepConf)
}
obj.Status.StepsReadyFrac = fmt.Sprintf("%d/%d", obj.Status.ReadySteps, obj.Status.TotalSteps)
- if err != nil {
- log.Error(err, "pipeline not ready due to step issues", "pipelineName", obj.Name)
- obj.Status.Ready = false
- meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{
- Type: v1alpha1.StepConditionError,
- Status: metav1.ConditionTrue,
- Reason: "StepNotReady",
- Message: err.Error(),
- })
- patch := client.MergeFrom(old)
- if err := c.Status().Patch(ctx, obj, patch); err != nil {
- log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name)
- }
- delete(c.Pipelines, obj.Name)
- delete(c.PipelineConfigs, obj.Name)
- return
- }
- c.Pipelines[obj.Name], err = c.Initializer.InitPipeline(ctx, obj.Name, steps)
+
+ var err error
+ c.Pipelines[obj.Name], err = c.Initializer.InitPipeline(ctx, *obj)
c.PipelineConfigs[obj.Name] = *obj
if err != nil {
log.Error(err, "failed to create pipeline", "pipelineName", obj.Name)
@@ -200,22 +188,17 @@ func (c *BasePipelineController[PipelineType]) HandlePipelineDeleted(
delete(c.PipelineConfigs, pipelineConf.Name)
}
-// Handle a step creation or update event from watching step resources.
-func (c *BasePipelineController[PipelineType]) handleStepChange(
+// Check if a step is ready, and if not, return an error indicating why not.
+func (c *BasePipelineController[PipelineType]) checkStepReady(
ctx context.Context,
- obj *v1alpha1.Step,
- queue workqueue.TypedRateLimitingInterface[reconcile.Request],
-) {
+ obj *v1alpha1.StepSpec,
+) error {
- if obj.Spec.SchedulingDomain != c.SchedulingDomain {
- return
- }
log := ctrl.LoggerFrom(ctx)
// Check the status of all knowledges depending on this step.
- old := obj.DeepCopy()
- obj.Status.ReadyKnowledges = 0
- obj.Status.TotalKnowledges = len(obj.Spec.Knowledges)
- for _, knowledgeRef := range obj.Spec.Knowledges {
+ readyKnowledges := 0
+ totalKnowledges := len(obj.Knowledges)
+ for _, knowledgeRef := range obj.Knowledges {
knowledge := &v1alpha1.Knowledge{}
if err := c.Get(ctx, client.ObjectKey{
Name: knowledgeRef.Name,
@@ -233,108 +216,15 @@ func (c *BasePipelineController[PipelineType]) handleStepChange(
log.Info("knowledge not ready, no data available", "knowledgeName", knowledgeRef.Name)
continue
}
- obj.Status.ReadyKnowledges++
- }
- obj.Status.KnowledgesReadyFrac = fmt.Sprintf("%d/%d", obj.Status.ReadyKnowledges, obj.Status.TotalKnowledges)
- if obj.Status.ReadyKnowledges != obj.Status.TotalKnowledges {
- obj.Status.Ready = false
- meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{
- Type: v1alpha1.StepConditionError,
- Status: metav1.ConditionTrue,
- Reason: "KnowledgesNotReady",
- Message: "not all knowledges are ready",
- })
- log.Info("step not ready, not all knowledges are ready", "stepName", obj.Name)
- } else {
- obj.Status.Ready = true
- meta.RemoveStatusCondition(&obj.Status.Conditions, v1alpha1.StepConditionError)
- log.Info("step is ready", "stepName", obj.Name)
- }
- patch := client.MergeFrom(old)
- if err := c.Status().Patch(ctx, obj, patch); err != nil {
- log.Error(err, "failed to patch step status", "stepName", obj.Name)
- return
- }
- // Find all pipelines depending on this step and re-evaluate them.
- var pipelines v1alpha1.PipelineList
- if err := c.List(ctx, &pipelines); err != nil {
- log.Error(err, "failed to list pipelines for step", "stepName", obj.Name)
- return
- }
- for _, pipeline := range pipelines.Items {
- needsUpdate := false
- for _, step := range pipeline.Spec.Steps {
- if step.Ref.Name == obj.Name && step.Ref.Namespace == obj.Namespace {
- needsUpdate = true
- break
- }
- }
- if needsUpdate {
- c.handlePipelineChange(ctx, &pipeline, queue)
- }
- }
-}
-
-// Handler bound to a step watch to handle created steps.
-//
-// This handler will look at the underlying resources of the step and check
-// if they are ready. It will then re-evaluate all pipelines depending on the step.
-func (c *BasePipelineController[PipelineType]) HandleStepCreated(
- ctx context.Context,
- evt event.CreateEvent,
- queue workqueue.TypedRateLimitingInterface[reconcile.Request],
-) {
-
- stepConf := evt.Object.(*v1alpha1.Step)
- c.handleStepChange(ctx, stepConf, queue)
-}
-
-// Handler bound to a step watch to handle updated steps.
-//
-// This handler will look at the underlying resources of the step and check
-// if they are ready. It will then re-evaluate all pipelines depending on the step.
-func (c *BasePipelineController[PipelineType]) HandleStepUpdated(
- ctx context.Context,
- evt event.UpdateEvent,
- queue workqueue.TypedRateLimitingInterface[reconcile.Request],
-) {
-
- stepConf := evt.ObjectNew.(*v1alpha1.Step)
- c.handleStepChange(ctx, stepConf, queue)
-}
-
-// Handler bound to a step watch to handle deleted steps.
-//
-// This handler will re-evaluate all pipelines depending on the step.
-func (c *BasePipelineController[PipelineType]) HandleStepDeleted(
- ctx context.Context,
- evt event.DeleteEvent,
- queue workqueue.TypedRateLimitingInterface[reconcile.Request],
-) {
-
- stepConf := evt.Object.(*v1alpha1.Step)
- if stepConf.Spec.SchedulingDomain != c.SchedulingDomain {
- return
+ readyKnowledges++
}
- // When a step is deleted, we need to re-evaluate all pipelines depending on it.
- var pipelines v1alpha1.PipelineList
- log := ctrl.LoggerFrom(ctx)
- if err := c.List(ctx, &pipelines); err != nil {
- log.Error(err, "failed to list pipelines for deleted step", "stepName", stepConf.Name)
- return
- }
- for _, pipeline := range pipelines.Items {
- needsUpdate := false
- for _, step := range pipeline.Spec.Steps {
- if step.Ref.Name == stepConf.Name && step.Ref.Namespace == stepConf.Namespace {
- needsUpdate = true
- break
- }
- }
- if needsUpdate {
- c.handlePipelineChange(ctx, &pipeline, queue)
- }
+ if readyKnowledges != totalKnowledges {
+ return fmt.Errorf(
+ "%d/%d knowledges ready",
+ readyKnowledges, totalKnowledges,
+ )
}
+ return nil
}
// Handle a knowledge creation, update, or delete event from watching knowledge resources.
@@ -348,30 +238,33 @@ func (c *BasePipelineController[PipelineType]) handleKnowledgeChange(
return
}
log := ctrl.LoggerFrom(ctx)
- log.Info("knowledge changed, re-evaluating dependent steps", "knowledgeName", obj.Name)
- // Find all steps depending on this knowledge and re-evaluate them.
- var steps v1alpha1.StepList
- if err := c.List(ctx, &steps); err != nil {
- log.Error(err, "failed to list steps for knowledge", "knowledgeName", obj.Name)
+ log.Info("knowledge changed, re-evaluating dependent pipelines", "knowledgeName", obj.Name)
+ // Find all pipelines depending on this knowledge and re-evaluate them.
+ var pipelines v1alpha1.PipelineList
+ if err := c.List(ctx, &pipelines); err != nil {
+ log.Error(err, "failed to list pipelines for knowledge", "knowledgeName", obj.Name)
return
}
- for _, step := range steps.Items {
+ for _, pipeline := range pipelines.Items {
needsUpdate := false
- for _, knowledgeRef := range step.Spec.Knowledges {
- if knowledgeRef.Name == obj.Name && knowledgeRef.Namespace == obj.Namespace {
- needsUpdate = true
- break
+ for _, step := range pipeline.Spec.Steps {
+ for _, knowledgeRef := range step.Knowledges {
+ if knowledgeRef.Name == obj.Name && knowledgeRef.Namespace == obj.Namespace {
+ needsUpdate = true
+ break
+ }
}
}
if needsUpdate {
- c.handleStepChange(ctx, &step, queue)
+ log.Info("re-evaluating pipeline due to knowledge change", "pipelineName", pipeline.Name)
+ c.handlePipelineChange(ctx, &pipeline, queue)
}
}
}
// Handler bound to a knowledge watch to handle created knowledges.
//
-// This handler will re-evaluate all steps depending on the knowledge.
+// This handler will re-evaluate all pipelines depending on the knowledge.
func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated(
ctx context.Context,
evt event.CreateEvent,
@@ -384,7 +277,7 @@ func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated(
// Handler bound to a knowledge watch to handle updated knowledges.
//
-// This handler will re-evaluate all steps depending on the knowledge.
+// This handler will re-evaluate all pipelines depending on the knowledge.
func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated(
ctx context.Context,
evt event.UpdateEvent,
@@ -406,7 +299,7 @@ func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated(
// Handler bound to a knowledge watch to handle deleted knowledges.
//
-// This handler will re-evaluate all steps depending on the knowledge.
+// This handler will re-evaluate all pipelines depending on the knowledge.
func (c *BasePipelineController[PipelineType]) HandleKnowledgeDeleted(
ctx context.Context,
evt event.DeleteEvent,
diff --git a/internal/scheduling/lib/pipeline_controller_test.go b/internal/scheduling/lib/pipeline_controller_test.go
index 326f48615..d16425740 100644
--- a/internal/scheduling/lib/pipeline_controller_test.go
+++ b/internal/scheduling/lib/pipeline_controller_test.go
@@ -5,203 +5,155 @@ package lib
import (
"context"
- "errors"
"testing"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
- "k8s.io/apimachinery/pkg/types"
- "k8s.io/client-go/util/workqueue"
- ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/event"
- "sigs.k8s.io/controller-runtime/pkg/reconcile"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
)
// Mock pipeline type for testing
type mockPipeline struct {
- name string
- steps []v1alpha1.Step
+ name string
}
-// Mock initializer implementation
-type mockInitializer struct {
- shouldFail bool
- initPipeline func(steps []v1alpha1.Step) (mockPipeline, error)
+// Mock PipelineInitializer for testing
+type mockPipelineInitializer struct {
+ pipelineType v1alpha1.PipelineType
+ initPipelineFunc func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error)
}
-func (m *mockInitializer) PipelineType() v1alpha1.PipelineType {
- return ""
-}
-
-func (m *mockInitializer) InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (mockPipeline, error) {
- if m.shouldFail {
- return mockPipeline{}, errors.New("mock initializer error")
- }
- if m.initPipeline != nil {
- return m.initPipeline(steps)
- }
- return mockPipeline{name: name, steps: steps}, nil
-}
-
-func setupTestScheme() *runtime.Scheme {
- scheme := runtime.NewScheme()
- err := v1alpha1.AddToScheme(scheme)
- if err != nil {
- return nil
- }
- err = v1alpha1.AddToScheme(scheme)
- if err != nil {
- return nil
- }
- return scheme
-}
-
-func createTestPipeline(steps []v1alpha1.StepInPipeline) *v1alpha1.Pipeline {
- return &v1alpha1.Pipeline{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-pipeline",
- },
- Spec: v1alpha1.PipelineSpec{
- SchedulingDomain: "test",
- Type: "",
- Steps: steps,
- },
- }
-}
-
-func createTestStep(ready bool, knowledges []corev1.ObjectReference) *v1alpha1.Step {
- return &v1alpha1.Step{
- ObjectMeta: metav1.ObjectMeta{
- Name: "test-step",
- Namespace: "default",
- },
- Spec: v1alpha1.StepSpec{
- SchedulingDomain: "test",
- Type: v1alpha1.StepTypeFilter,
- Impl: "test-impl",
- Knowledges: knowledges,
- },
- Status: v1alpha1.StepStatus{
- Ready: ready,
- ReadyKnowledges: len(knowledges),
- TotalKnowledges: len(knowledges),
- KnowledgesReadyFrac: "ready",
- },
+func (m *mockPipelineInitializer) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) {
+ if m.initPipelineFunc != nil {
+ return m.initPipelineFunc(ctx, p)
}
+ return mockPipeline{name: p.Name}, nil
}
-func createTestKnowledge(name string, hasError bool, rawLength int) *v1alpha1.Knowledge {
- knowledge := &v1alpha1.Knowledge{
- ObjectMeta: metav1.ObjectMeta{
- Name: name,
- Namespace: "default",
- },
- Spec: v1alpha1.KnowledgeSpec{
- SchedulingDomain: "test",
- },
- Status: v1alpha1.KnowledgeStatus{
- RawLength: rawLength,
- },
- }
- if hasError {
- meta.SetStatusCondition(&knowledge.Status.Conditions, metav1.Condition{
- Type: v1alpha1.KnowledgeConditionError,
- Status: metav1.ConditionTrue,
- Reason: "TestError",
- Message: "This is a test error",
- })
- }
- return knowledge
+func (m *mockPipelineInitializer) PipelineType() v1alpha1.PipelineType {
+ return m.pipelineType
}
func TestBasePipelineController_InitAllPipelines(t *testing.T) {
- scheme := setupTestScheme()
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
tests := []struct {
name string
existingPipelines []v1alpha1.Pipeline
- existingSteps []v1alpha1.Step
- initializerFails bool
- expectedPipelines int
+ schedulingDomain v1alpha1.SchedulingDomain
+ pipelineType v1alpha1.PipelineType
+ expectedCount int
expectError bool
}{
{
name: "no existing pipelines",
existingPipelines: []v1alpha1.Pipeline{},
- expectedPipelines: 0,
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ expectedCount: 0,
expectError: false,
},
{
- name: "single pipeline with ready step",
+ name: "one matching pipeline",
existingPipelines: []v1alpha1.Pipeline{
- *createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
},
- }),
- },
- existingSteps: []v1alpha1.Step{
- *createTestStep(true, nil),
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
},
- expectedPipelines: 1,
- expectError: false,
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ expectedCount: 1,
+ expectError: false,
},
{
- name: "pipeline with non-ready mandatory step",
+ name: "multiple pipelines, only some matching",
existingPipelines: []v1alpha1.Pipeline{
- *createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "matching-pipeline-1",
},
- }),
- },
- existingSteps: []v1alpha1.Step{
- *createTestStep(false, nil),
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "different-domain-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainCinder,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "different-type-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeDescheduler,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "matching-pipeline-2",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
},
- expectedPipelines: 0,
- expectError: false,
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ expectedCount: 2,
+ expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- objects := make([]client.Object, 0)
+ objects := make([]client.Object, len(tt.existingPipelines))
for i := range tt.existingPipelines {
- objects = append(objects, &tt.existingPipelines[i])
- }
- for i := range tt.existingSteps {
- objects = append(objects, &tt.existingSteps[i])
+ objects[i] = &tt.existingPipelines[i]
}
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}).
+ WithStatusSubresource(&v1alpha1.Pipeline{}).
Build()
- initializer := &mockInitializer{shouldFail: tt.initializerFails}
controller := &BasePipelineController[mockPipeline]{
- Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
+ Client: fakeClient,
+ SchedulingDomain: tt.schedulingDomain,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: tt.pipelineType,
+ },
}
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- err := controller.InitAllPipelines(ctx)
+ err := controller.InitAllPipelines(context.Background())
if tt.expectError && err == nil {
t.Error("Expected error but got none")
@@ -210,485 +162,1028 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) {
t.Errorf("Expected no error but got: %v", err)
}
- if len(controller.Pipelines) != tt.expectedPipelines {
- t.Errorf("Expected %d pipelines, got %d", tt.expectedPipelines, len(controller.Pipelines))
+ if len(controller.Pipelines) != tt.expectedCount {
+ t.Errorf("Expected %d pipelines, got %d", tt.expectedCount, len(controller.Pipelines))
+ }
+
+ if len(controller.PipelineConfigs) != tt.expectedCount {
+ t.Errorf("Expected %d pipeline configs, got %d", tt.expectedCount, len(controller.PipelineConfigs))
}
})
}
}
-func TestBasePipelineController_HandlePipelineCreated(t *testing.T) {
- scheme := setupTestScheme()
+func TestBasePipelineController_handlePipelineChange(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
tests := []struct {
- name string
- pipeline *v1alpha1.Pipeline
- existingSteps []v1alpha1.Step
- initializerFails bool
- expectReady bool
- expectError bool
+ name string
+ pipeline *v1alpha1.Pipeline
+ knowledges []v1alpha1.Knowledge
+ schedulingDomain v1alpha1.SchedulingDomain
+ initPipelineError bool
+ expectReady bool
+ expectInMap bool
+ expectCondition string
}{
{
- name: "pipeline with ready steps",
- pipeline: createTestPipeline([]v1alpha1.StepInPipeline{
+ name: "pipeline with all steps ready",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Mandatory: true,
+ Knowledges: []corev1.ObjectReference{
+ {Name: "knowledge-1", Namespace: "default"},
+ },
+ },
+ },
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{
{
- Ref: corev1.ObjectReference{
- Name: "test-step",
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "knowledge-1",
Namespace: "default",
},
- Mandatory: true,
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
},
- }),
- existingSteps: []v1alpha1.Step{
- *createTestStep(true, nil),
},
- expectReady: true,
- expectError: false,
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReady: true,
+ expectInMap: true,
},
{
- name: "pipeline with non-ready mandatory step",
- pipeline: createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
+ name: "pipeline with mandatory step not ready",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline-not-ready",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Mandatory: true,
+ Knowledges: []corev1.ObjectReference{
+ {Name: "missing-knowledge", Namespace: "default"},
+ },
+ },
},
- Mandatory: true,
},
- }),
- existingSteps: []v1alpha1.Step{
- *createTestStep(false, nil),
},
- expectReady: false,
- expectError: true,
+ knowledges: []v1alpha1.Knowledge{},
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReady: false,
+ expectInMap: false,
+ expectCondition: v1alpha1.PipelineConditionError,
},
{
- name: "pipeline with non-ready optional step",
- pipeline: createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
+ name: "pipeline with optional step not ready",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline-optional",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Mandatory: false,
+ Knowledges: []corev1.ObjectReference{
+ {Name: "missing-knowledge", Namespace: "default"},
+ },
+ },
},
- Mandatory: false,
},
- }),
- existingSteps: []v1alpha1.Step{
- *createTestStep(false, nil),
},
- expectReady: true,
- expectError: false,
+ knowledges: []v1alpha1.Knowledge{},
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReady: true,
+ expectInMap: true,
+ },
+ {
+ name: "pipeline init fails",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline-init-fail",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{},
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ initPipelineError: true,
+ expectReady: false,
+ expectInMap: false,
+ expectCondition: v1alpha1.PipelineConditionError,
+ },
+ {
+ name: "pipeline with different scheduling domain",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline-different-domain",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainCinder,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{},
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReady: false,
+ expectInMap: false,
},
{
- name: "initializer fails to initialize pipeline",
- pipeline: createTestPipeline([]v1alpha1.StepInPipeline{
+ name: "pipeline with knowledge in error state",
+ pipeline: &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline-knowledge-error",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Mandatory: true,
+ Knowledges: []corev1.ObjectReference{
+ {Name: "error-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{
{
- Ref: corev1.ObjectReference{
- Name: "test-step",
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "error-knowledge",
Namespace: "default",
},
- Mandatory: true,
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ Conditions: []metav1.Condition{
+ {
+ Type: v1alpha1.KnowledgeConditionError,
+ Status: metav1.ConditionTrue,
+ },
+ },
+ },
},
- }),
- existingSteps: []v1alpha1.Step{
- *createTestStep(true, nil),
},
- initializerFails: true,
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
expectReady: false,
- expectError: true,
+ expectInMap: false,
+ expectCondition: v1alpha1.PipelineConditionError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- objects := make([]client.Object, 0)
- objects = append(objects, tt.pipeline)
- for i := range tt.existingSteps {
- objects = append(objects, &tt.existingSteps[i])
+ objects := []client.Object{tt.pipeline}
+ for i := range tt.knowledges {
+ objects = append(objects, &tt.knowledges[i])
}
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}).
+ WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}).
Build()
- initializer := &mockInitializer{shouldFail: tt.initializerFails}
+ initializer := &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ }
+
+ if tt.initPipelineError {
+ initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) {
+ return mockPipeline{}, context.Canceled
+ }
+ }
+
controller := &BasePipelineController[mockPipeline]{
- Pipelines: make(map[string]mockPipeline),
+ Client: fakeClient,
+ SchedulingDomain: tt.schedulingDomain,
Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
}
- controller.Pipelines = make(map[string]mockPipeline)
- controller.PipelineConfigs = make(map[string]v1alpha1.Pipeline)
-
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.CreateEvent{Object: tt.pipeline}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
- controller.HandlePipelineCreated(ctx, evt, queue)
+ controller.handlePipelineChange(context.Background(), tt.pipeline, nil)
- // Check if pipeline was added to map
- _, pipelineExists := controller.Pipelines[tt.pipeline.Name]
- if tt.expectReady && !pipelineExists {
- t.Error("Expected pipeline to be in map but it wasn't")
- }
- if !tt.expectReady && pipelineExists {
- t.Error("Expected pipeline not to be in map but it was")
+ // Check if pipeline is in map
+ _, inMap := controller.Pipelines[tt.pipeline.Name]
+ if inMap != tt.expectInMap {
+ t.Errorf("Expected pipeline in map: %v, got: %v", tt.expectInMap, inMap)
}
- // Verify pipeline status was updated
+ // Get updated pipeline status
var updatedPipeline v1alpha1.Pipeline
- err := client.Get(ctx, types.NamespacedName{Name: tt.pipeline.Name}, &updatedPipeline)
+ err := fakeClient.Get(context.Background(), client.ObjectKey{Name: tt.pipeline.Name}, &updatedPipeline)
if err != nil {
t.Fatalf("Failed to get updated pipeline: %v", err)
}
+ // Check ready status
if updatedPipeline.Status.Ready != tt.expectReady {
- t.Errorf("Expected Ready=%v, got %v", tt.expectReady, updatedPipeline.Status.Ready)
+ t.Errorf("Expected ready status: %v, got: %v", tt.expectReady, updatedPipeline.Status.Ready)
}
- hasError := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, v1alpha1.PipelineConditionError)
- if hasError != tt.expectError {
- t.Errorf("Expected Error condition=%v, got %v", tt.expectError, hasError)
+ // Check condition if specified
+ if tt.expectCondition != "" {
+ hasCondition := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, tt.expectCondition)
+ if !hasCondition {
+ t.Errorf("Expected condition %s to be true", tt.expectCondition)
+ }
}
})
}
}
-func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) {
- scheme := setupTestScheme()
+func TestBasePipelineController_HandlePipelineCreated(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
+
+ pipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ }
- pipeline := createTestPipeline(nil)
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(pipeline).
+ WithStatusSubresource(&v1alpha1.Pipeline{}).
Build()
- initializer := &mockInitializer{}
+ controller := &BasePipelineController[mockPipeline]{
+ Client: fakeClient,
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ },
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
+ }
+
+ evt := event.CreateEvent{
+ Object: pipeline,
+ }
+
+ controller.HandlePipelineCreated(context.Background(), evt, nil)
+
+ if _, exists := controller.Pipelines[pipeline.Name]; !exists {
+ t.Error("Expected pipeline to be in map after creation")
+ }
+}
+
+func TestBasePipelineController_HandlePipelineUpdated(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
+
+ oldPipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{},
+ },
+ }
+
+ newPipeline := oldPipeline.DeepCopy()
+ newPipeline.Spec.Description = "Updated description"
+
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithObjects(newPipeline).
+ WithStatusSubresource(&v1alpha1.Pipeline{}).
+ Build()
+
+ controller := &BasePipelineController[mockPipeline]{
+ Client: fakeClient,
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ },
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
+ }
+
+ evt := event.UpdateEvent{
+ ObjectOld: oldPipeline,
+ ObjectNew: newPipeline,
+ }
+
+ controller.HandlePipelineUpdated(context.Background(), evt, nil)
+
+ if _, exists := controller.Pipelines[newPipeline.Name]; !exists {
+ t.Error("Expected pipeline to be in map after update")
+ }
+}
+
+func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
+
+ pipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ },
+ }
+
controller := &BasePipelineController[mockPipeline]{
Pipelines: map[string]mockPipeline{
"test-pipeline": {name: "test-pipeline"},
},
- Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
+ PipelineConfigs: map[string]v1alpha1.Pipeline{
+ "test-pipeline": *pipeline,
+ },
}
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.DeleteEvent{Object: pipeline}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
+ evt := event.DeleteEvent{
+ Object: pipeline,
+ }
- controller.HandlePipelineDeleted(ctx, evt, queue)
+ controller.HandlePipelineDeleted(context.Background(), evt, nil)
- if _, exists := controller.Pipelines["test-pipeline"]; exists {
- t.Error("Expected pipeline to be removed from map")
+ if _, exists := controller.Pipelines[pipeline.Name]; exists {
+ t.Error("Expected pipeline to be removed from map after deletion")
+ }
+ if _, exists := controller.PipelineConfigs[pipeline.Name]; exists {
+ t.Error("Expected pipeline config to be removed from map after deletion")
}
}
-func TestBasePipelineController_HandleStepCreated(t *testing.T) {
- scheme := setupTestScheme()
+func TestBasePipelineController_checkStepReady(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
tests := []struct {
- name string
- step *v1alpha1.Step
- knowledges []v1alpha1.Knowledge
- pipelines []v1alpha1.Pipeline
- expectedReady bool
- expectedPipelines int
+ name string
+ step v1alpha1.StepSpec
+ knowledges []v1alpha1.Knowledge
+ expectError bool
}{
{
- name: "step with ready knowledges",
- step: createTestStep(false, []corev1.ObjectReference{
- {Name: "knowledge1", Namespace: "default"},
- }),
+ name: "step with no knowledge dependencies",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{},
+ },
+ knowledges: []v1alpha1.Knowledge{},
+ expectError: false,
+ },
+ {
+ name: "step with ready knowledge",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "ready-knowledge", Namespace: "default"},
+ },
+ },
knowledges: []v1alpha1.Knowledge{
- *createTestKnowledge("knowledge1", false, 10),
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "ready-knowledge",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
},
- pipelines: []v1alpha1.Pipeline{
- *createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
+ expectError: false,
+ },
+ {
+ name: "step with knowledge in error state",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "error-knowledge", Namespace: "default"},
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "error-knowledge",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ Conditions: []metav1.Condition{
+ {
+ Type: v1alpha1.KnowledgeConditionError,
+ Status: metav1.ConditionTrue,
+ },
},
- Mandatory: true,
},
- }),
+ },
+ },
+ expectError: true,
+ },
+ {
+ name: "step with knowledge with no data",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "no-data-knowledge", Namespace: "default"},
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "no-data-knowledge",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 0,
+ },
+ },
+ },
+ expectError: true,
+ },
+ {
+ name: "step with missing knowledge",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "missing-knowledge", Namespace: "default"},
+ },
},
- expectedReady: true,
- expectedPipelines: 1,
+ knowledges: []v1alpha1.Knowledge{},
+ expectError: true,
},
{
- name: "step with knowledge error",
- step: createTestStep(false, []corev1.ObjectReference{
- {Name: "knowledge1", Namespace: "default"},
- }),
+ name: "step with multiple knowledges, all ready",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "knowledge-1", Namespace: "default"},
+ {Name: "knowledge-2", Namespace: "default"},
+ },
+ },
knowledges: []v1alpha1.Knowledge{
- *createTestKnowledge("knowledge1", true, 0),
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "knowledge-1",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "knowledge-2",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 5,
+ },
+ },
},
- pipelines: []v1alpha1.Pipeline{
- *createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
+ expectError: false,
+ },
+ {
+ name: "step with multiple knowledges, some not ready",
+ step: v1alpha1.StepSpec{
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "ready-knowledge", Namespace: "default"},
+ {Name: "not-ready-knowledge", Namespace: "default"},
+ },
+ },
+ knowledges: []v1alpha1.Knowledge{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "ready-knowledge",
+ Namespace: "default",
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "not-ready-knowledge",
+ Namespace: "default",
},
- }),
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 0,
+ },
+ },
},
- expectedReady: false,
- expectedPipelines: 0,
+ expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- objects := make([]client.Object, 0)
- objects = append(objects, tt.step)
+ objects := make([]client.Object, len(tt.knowledges))
for i := range tt.knowledges {
- objects = append(objects, &tt.knowledges[i])
- }
- for i := range tt.pipelines {
- objects = append(objects, &tt.pipelines[i])
+ objects[i] = &tt.knowledges[i]
}
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}, &v1alpha1.Knowledge{}).
Build()
- initializer := &mockInitializer{}
controller := &BasePipelineController[mockPipeline]{
- Pipelines: make(map[string]mockPipeline),
- PipelineConfigs: make(map[string]v1alpha1.Pipeline),
- Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
+ Client: fakeClient,
}
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.CreateEvent{Object: tt.step}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
-
- controller.HandleStepCreated(ctx, evt, queue)
-
- // Verify step status was updated
- var updatedStep v1alpha1.Step
- err := client.Get(ctx, types.NamespacedName{Name: tt.step.Name, Namespace: tt.step.Namespace}, &updatedStep)
- if err != nil {
- t.Fatalf("Failed to get updated step: %v", err)
- }
+ err := controller.checkStepReady(context.Background(), &tt.step)
- if updatedStep.Status.Ready != tt.expectedReady {
- t.Errorf("Expected step Ready=%v, got %v", tt.expectedReady, updatedStep.Status.Ready)
+ if tt.expectError && err == nil {
+ t.Error("Expected error but got none")
}
-
- // Check if pipelines were updated correctly
- if len(controller.Pipelines) != tt.expectedPipelines {
- t.Errorf("Expected %d pipelines in map, got %d", tt.expectedPipelines, len(controller.Pipelines))
+ if !tt.expectError && err != nil {
+ t.Errorf("Expected no error but got: %v", err)
}
})
}
}
-func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) {
- scheme := setupTestScheme()
+func TestBasePipelineController_handleKnowledgeChange(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
tests := []struct {
- name string
- oldKnowledge *v1alpha1.Knowledge
- newKnowledge *v1alpha1.Knowledge
- shouldTrigger bool
+ name string
+ knowledge *v1alpha1.Knowledge
+ pipelines []v1alpha1.Pipeline
+ schedulingDomain v1alpha1.SchedulingDomain
+ expectReEvaluated []string
}{
{
- name: "error status changed",
- oldKnowledge: createTestKnowledge("test-knowledge", false, 10),
- newKnowledge: createTestKnowledge("test-knowledge", true, 10),
- shouldTrigger: true,
- },
- {
- name: "data became available",
- oldKnowledge: createTestKnowledge("test-knowledge", false, 0),
- newKnowledge: createTestKnowledge("test-knowledge", false, 10),
- shouldTrigger: true,
+ name: "knowledge change triggers dependent pipeline re-evaluation",
+ knowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ pipelines: []v1alpha1.Pipeline{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "dependent-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "test-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ },
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "independent-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "other-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ },
+ },
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReEvaluated: []string{"dependent-pipeline"},
},
{
- name: "no relevant change",
- oldKnowledge: createTestKnowledge("test-knowledge", false, 10),
- newKnowledge: createTestKnowledge("test-knowledge", false, 15),
- shouldTrigger: false,
+ name: "knowledge change in different scheduling domain",
+ knowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainCinder,
+ },
+ },
+ pipelines: []v1alpha1.Pipeline{
+ {
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "nova-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "test-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ },
+ },
+ schedulingDomain: v1alpha1.SchedulingDomainNova,
+ expectReEvaluated: []string{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- step := createTestStep(false, []corev1.ObjectReference{
- {Name: "test-knowledge", Namespace: "default"},
- })
- pipeline := createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
- },
- })
-
- objects := []client.Object{tt.newKnowledge, step, pipeline}
+ objects := []client.Object{tt.knowledge}
+ for i := range tt.pipelines {
+ objects = append(objects, &tt.pipelines[i])
+ }
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}, &v1alpha1.Knowledge{}).
+ WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}).
Build()
- initializer := &mockInitializer{}
controller := &BasePipelineController[mockPipeline]{
- Pipelines: make(map[string]mockPipeline),
- Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
- }
- controller.Pipelines = make(map[string]mockPipeline)
- controller.PipelineConfigs = make(map[string]v1alpha1.Pipeline)
-
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.UpdateEvent{
- ObjectOld: tt.oldKnowledge,
- ObjectNew: tt.newKnowledge,
+ Client: fakeClient,
+ SchedulingDomain: tt.schedulingDomain,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ },
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
- controller.HandleKnowledgeUpdated(ctx, evt, queue)
+ controller.handleKnowledgeChange(context.Background(), tt.knowledge, nil)
- // If should trigger, verify step status was updated
- if tt.shouldTrigger {
- var updatedStep v1alpha1.Step
- err := client.Get(ctx, types.NamespacedName{Name: step.Name, Namespace: step.Namespace}, &updatedStep)
- if err != nil {
- t.Fatalf("Failed to get updated step: %v", err)
+ // Verify expected pipelines were re-evaluated by checking if they're in the map
+ for _, expectedName := range tt.expectReEvaluated {
+ if _, exists := controller.Pipelines[expectedName]; !exists {
+ t.Errorf("Expected pipeline %s to be re-evaluated", expectedName)
}
- // Status should have been recalculated
}
})
}
}
-func TestBasePipelineController_HandleStepDeleted(t *testing.T) {
- scheme := setupTestScheme()
+func TestBasePipelineController_HandleKnowledgeCreated(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
- step := createTestStep(true, nil)
- pipeline := createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
+ knowledge := &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
},
- })
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ }
- // Only include the pipeline in the fake client, not the step (simulating step deletion)
- objects := []client.Object{pipeline}
+ pipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "test-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ }
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
- WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}).
+ WithObjects(knowledge, pipeline).
+ WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}).
Build()
- initializer := &mockInitializer{}
controller := &BasePipelineController[mockPipeline]{
- Pipelines: map[string]mockPipeline{
- "test-pipeline": {name: "test-pipeline"},
+ Client: fakeClient,
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
},
- Initializer: initializer,
- Client: client,
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
}
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.DeleteEvent{Object: step}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
-
- // Initially pipeline should be in map
- if _, exists := controller.Pipelines["test-pipeline"]; !exists {
- t.Fatal("Expected pipeline to be in map initially")
+ evt := event.CreateEvent{
+ Object: knowledge,
}
- controller.HandleStepDeleted(ctx, evt, queue)
+ controller.HandleKnowledgeCreated(context.Background(), evt, nil)
- // The main requirement is that HandleStepDeleted successfully processes the event
- // without crashing. The exact behavior depends on implementation details, but
- // it should handle the case where a dependent step is deleted gracefully.
+ // Pipeline should be re-evaluated and added to map
+ if _, exists := controller.Pipelines[pipeline.Name]; !exists {
+ t.Error("Expected pipeline to be re-evaluated after knowledge creation")
+ }
+}
- // The pipeline may or may not be removed from map depending on the implementation
- // but the method should not panic or error
+func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) {
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
- // Get the pipeline status to verify it was processed
- var updatedPipeline v1alpha1.Pipeline
- err := client.Get(ctx, types.NamespacedName{Name: pipeline.Name}, &updatedPipeline)
- if err != nil {
- t.Errorf("Failed to get pipeline after step deletion: %v", err)
+ tests := []struct {
+ name string
+ oldKnowledge *v1alpha1.Knowledge
+ newKnowledge *v1alpha1.Knowledge
+ expectReEvaluate bool
+ }{
+ {
+ name: "error state changed",
+ oldKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ Conditions: []metav1.Condition{
+ {
+ Type: v1alpha1.KnowledgeConditionError,
+ Status: metav1.ConditionTrue,
+ },
+ },
+ },
+ },
+ newKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ expectReEvaluate: true,
+ },
+ {
+ name: "data became available",
+ oldKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 0,
+ },
+ },
+ newKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ expectReEvaluate: true,
+ },
+ {
+ name: "no relevant change",
+ oldKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 10,
+ },
+ },
+ newKnowledge: &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ },
+ Status: v1alpha1.KnowledgeStatus{
+ RawLength: 15,
+ },
+ },
+ expectReEvaluate: false,
+ },
}
- // The status should reflect the current state - either ready with no steps, or not ready with error
- // Both are valid depending on how the implementation handles missing mandatory steps
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ pipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Knowledges: []corev1.ObjectReference{
+ {Name: "test-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ }
+
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithObjects(tt.newKnowledge, pipeline).
+ WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}).
+ Build()
+
+ controller := &BasePipelineController[mockPipeline]{
+ Client: fakeClient,
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ },
+ Pipelines: make(map[string]mockPipeline),
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
+ }
+
+ evt := event.UpdateEvent{
+ ObjectOld: tt.oldKnowledge,
+ ObjectNew: tt.newKnowledge,
+ }
+
+ controller.HandleKnowledgeUpdated(context.Background(), evt, nil)
+
+ _, exists := controller.Pipelines[pipeline.Name]
+ if tt.expectReEvaluate && !exists {
+ t.Error("Expected pipeline to be re-evaluated")
+ }
+ if !tt.expectReEvaluate && exists {
+ t.Error("Expected pipeline not to be re-evaluated")
+ }
+ })
+ }
}
func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) {
- scheme := setupTestScheme()
+ scheme := runtime.NewScheme()
+ if err := v1alpha1.AddToScheme(scheme); err != nil {
+ t.Fatalf("Failed to add v1alpha1 scheme: %v", err)
+ }
- knowledge := createTestKnowledge("test-knowledge", false, 10)
- step := createTestStep(true, []corev1.ObjectReference{
- {Name: "test-knowledge", Namespace: "default"},
- })
- pipeline := createTestPipeline([]v1alpha1.StepInPipeline{
- {
- Ref: corev1.ObjectReference{
- Name: "test-step",
- Namespace: "default",
- },
- Mandatory: true,
+ knowledge := &v1alpha1.Knowledge{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-knowledge",
+ Namespace: "default",
+ },
+ Spec: v1alpha1.KnowledgeSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
},
- })
+ }
- objects := []client.Object{step, pipeline}
+ pipeline := &v1alpha1.Pipeline{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pipeline",
+ },
+ Spec: v1alpha1.PipelineSpec{
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Type: v1alpha1.PipelineTypeFilterWeigher,
+ Steps: []v1alpha1.StepSpec{
+ {
+ Type: v1alpha1.StepTypeFilter,
+ Impl: "test-filter",
+ Mandatory: true,
+ Knowledges: []corev1.ObjectReference{
+ {Name: "test-knowledge", Namespace: "default"},
+ },
+ },
+ },
+ },
+ }
- client := fake.NewClientBuilder().
+ fakeClient := fake.NewClientBuilder().
WithScheme(scheme).
- WithObjects(objects...).
- WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}).
+ WithObjects(pipeline).
+ WithStatusSubresource(&v1alpha1.Pipeline{}).
Build()
- initializer := &mockInitializer{}
controller := &BasePipelineController[mockPipeline]{
+ Client: fakeClient,
+ SchedulingDomain: v1alpha1.SchedulingDomainNova,
+ Initializer: &mockPipelineInitializer{
+ pipelineType: v1alpha1.PipelineTypeFilterWeigher,
+ },
Pipelines: map[string]mockPipeline{
"test-pipeline": {name: "test-pipeline"},
},
- Initializer: initializer,
- Client: client,
- SchedulingDomain: "test",
+ PipelineConfigs: make(map[string]v1alpha1.Pipeline),
}
- ctx := ctrl.LoggerInto(context.Background(), ctrl.Log)
- evt := event.DeleteEvent{Object: knowledge}
- queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]())
-
- controller.HandleKnowledgeDeleted(ctx, evt, queue)
-
- // Verify step status was updated (should now be not ready due to missing knowledge)
- var updatedStep v1alpha1.Step
- err := client.Get(ctx, types.NamespacedName{Name: step.Name, Namespace: step.Namespace}, &updatedStep)
- if err != nil {
- t.Fatalf("Failed to get updated step: %v", err)
+ evt := event.DeleteEvent{
+ Object: knowledge,
}
- if updatedStep.Status.Ready {
- t.Error("Expected step to be not ready after knowledge deletion")
+ controller.HandleKnowledgeDeleted(context.Background(), evt, nil)
+
+ // When knowledge is deleted, the pipeline is re-evaluated.
+ // Since the knowledge is now missing and the step is mandatory,
+ // the pipeline should be removed from the map.
+ if _, exists := controller.Pipelines[pipeline.Name]; exists {
+ t.Error("Expected pipeline to be removed after knowledge deletion due to mandatory step")
}
}
diff --git a/internal/scheduling/lib/pipeline_test.go b/internal/scheduling/lib/pipeline_test.go
index de86cc8c8..9a8651384 100644
--- a/internal/scheduling/lib/pipeline_test.go
+++ b/internal/scheduling/lib/pipeline_test.go
@@ -18,7 +18,7 @@ type mockPipelineStep struct {
name string
}
-func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return nil
}
diff --git a/internal/scheduling/lib/step.go b/internal/scheduling/lib/step.go
index 845b3306d..a25c55a2a 100644
--- a/internal/scheduling/lib/step.go
+++ b/internal/scheduling/lib/step.go
@@ -32,7 +32,7 @@ func (EmptyStepOpts) Validate() error { return nil }
// Interface for a scheduler step.
type Step[RequestType PipelineRequest] interface {
// Configure the step and initialize things like a database connection.
- Init(ctx context.Context, client client.Client, step v1alpha1.Step) error
+ Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error
// Run this step of the scheduling pipeline.
// Return a map of keys to activation values. Important: keys that are
// not in the map are considered as filtered out.
@@ -53,8 +53,8 @@ type BaseStep[RequestType PipelineRequest, Opts StepOpts] struct {
}
// Init the step with the database and options.
-func (s *BaseStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
- opts := conf.NewRawOptsBytes(step.Spec.Opts.Raw)
+func (s *BaseStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
+ opts := conf.NewRawOptsBytes(step.Opts.Raw)
if err := s.Load(opts); err != nil {
return err
}
diff --git a/internal/scheduling/lib/step_monitor.go b/internal/scheduling/lib/step_monitor.go
index ea6c8f84d..ed6a79bdf 100644
--- a/internal/scheduling/lib/step_monitor.go
+++ b/internal/scheduling/lib/step_monitor.go
@@ -44,7 +44,7 @@ type StepMonitor[RequestType PipelineRequest] struct {
}
// Initialize the wrapped step with the database and options.
-func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return s.Step.Init(ctx, client, step)
}
@@ -52,12 +52,12 @@ func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Clien
func monitorStep[RequestType PipelineRequest](
_ context.Context,
_ client.Client,
- step v1alpha1.Step,
+ step v1alpha1.StepSpec,
impl Step[RequestType],
m PipelineMonitor,
) *StepMonitor[RequestType] {
- stepName := step.Namespace + "/" + step.Name
+ stepName := step.Impl
var runTimer prometheus.Observer
if m.stepRunTimer != nil {
runTimer = m.stepRunTimer.
diff --git a/internal/scheduling/lib/step_test.go b/internal/scheduling/lib/step_test.go
index 275d2a543..31d335cd3 100644
--- a/internal/scheduling/lib/step_test.go
+++ b/internal/scheduling/lib/step_test.go
@@ -12,11 +12,11 @@ import (
)
type mockStep[RequestType PipelineRequest] struct {
- InitFunc func(ctx context.Context, client client.Client, step v1alpha1.Step) error
+ InitFunc func(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error
RunFunc func(traceLog *slog.Logger, request RequestType) (*StepResult, error)
}
-func (m *mockStep[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (m *mockStep[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
return m.InitFunc(ctx, client, step)
}
func (m *mockStep[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) {
diff --git a/internal/scheduling/lib/step_validation.go b/internal/scheduling/lib/step_validation.go
index 7ede29e59..638bf6e84 100644
--- a/internal/scheduling/lib/step_validation.go
+++ b/internal/scheduling/lib/step_validation.go
@@ -22,9 +22,9 @@ type StepValidator[RequestType PipelineRequest] struct {
}
// Initialize the wrapped step with the database and options.
-func (s *StepValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error {
+func (s *StepValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error {
slog.Info(
- "scheduler: init validation for step", "name", step.Name,
+ "scheduler: init validation for step", "name", step.Impl,
"disabled", s.DisabledValidations,
)
return s.Step.Init(ctx, client, step)
diff --git a/tools/plutono/provisioning/dashboards/cortex-status.json b/tools/plutono/provisioning/dashboards/cortex-status.json
index d3061ecca..fee633efc 100644
--- a/tools/plutono/provisioning/dashboards/cortex-status.json
+++ b/tools/plutono/provisioning/dashboards/cortex-status.json
@@ -276,7 +276,56 @@
},
"unit": "none"
},
- "overrides": []
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byFrameRefID",
+ "options": "B"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(71, 71, 71)",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 1
+ }
+ ]
+ }
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byFrameRefID",
+ "options": "C"
+ },
+ "properties": [
+ {
+ "id": "thresholds",
+ "value": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "rgb(71, 71, 71)",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 10
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
},
"gridPos": {
"h": 8,
@@ -284,12 +333,12 @@
"x": 12,
"y": 9
},
- "id": 53,
+ "id": 55,
"options": {
"colorMode": "background",
"graphMode": "none",
- "justifyMode": "center",
- "orientation": "auto",
+ "justifyMode": "auto",
+ "orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
@@ -298,33 +347,42 @@
"values": true
},
"text": {},
- "textMode": "name"
+ "textMode": "value_and_name"
},
"pluginVersion": "7.5.37",
"targets": [
{
"exemplar": true,
- "expr": "max by(step) (cortex_step_state{state!=\"ready\"}) * 0",
+ "expr": "cortex_decision_state{state=\"success\"}",
"format": "time_series",
"hide": false,
"instant": true,
"interval": "",
- "legendFormat": "{{step}}",
+ "legendFormat": "{{state}}",
"refId": "A"
},
{
"exemplar": true,
- "expr": "max by(step) (cortex_step_state{state=\"ready\"}) * 1",
+ "expr": "cortex_decision_state{state!=\"success\",state!=\"waiting\"}",
"hide": false,
"instant": true,
"interval": "",
- "legendFormat": "{{step}}",
+ "legendFormat": "{{state}}",
"refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "cortex_decision_state{state!=\"success\",state=\"waiting\"}",
+ "hide": false,
+ "instant": true,
+ "interval": "",
+ "legendFormat": "{{state}}",
+ "refId": "C"
}
],
"timeFrom": null,
"timeShift": null,
- "title": "Step status",
+ "title": "Decision status",
"type": "stat"
},
{
@@ -401,138 +459,6 @@
"title": "KPI status",
"type": "stat"
},
- {
- "datasource": "prometheus-openstack",
- "fieldConfig": {
- "defaults": {
- "color": {
- "mode": "thresholds"
- },
- "mappings": [],
- "thresholds": {
- "mode": "absolute",
- "steps": [
- {
- "color": "red",
- "value": null
- },
- {
- "color": "rgb(66, 66, 66)",
- "value": 1
- }
- ]
- },
- "unit": "none"
- },
- "overrides": [
- {
- "matcher": {
- "id": "byFrameRefID",
- "options": "B"
- },
- "properties": [
- {
- "id": "thresholds",
- "value": {
- "mode": "absolute",
- "steps": [
- {
- "color": "rgb(71, 71, 71)",
- "value": null
- },
- {
- "color": "red",
- "value": 1
- }
- ]
- }
- }
- ]
- },
- {
- "matcher": {
- "id": "byFrameRefID",
- "options": "C"
- },
- "properties": [
- {
- "id": "thresholds",
- "value": {
- "mode": "absolute",
- "steps": [
- {
- "color": "rgb(71, 71, 71)",
- "value": null
- },
- {
- "color": "red",
- "value": 10
- }
- ]
- }
- }
- ]
- }
- ]
- },
- "gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
- "y": 17
- },
- "id": 55,
- "options": {
- "colorMode": "background",
- "graphMode": "none",
- "justifyMode": "auto",
- "orientation": "horizontal",
- "reduceOptions": {
- "calcs": [
- "lastNotNull"
- ],
- "fields": "",
- "values": true
- },
- "text": {},
- "textMode": "value_and_name"
- },
- "pluginVersion": "7.5.37",
- "targets": [
- {
- "exemplar": true,
- "expr": "cortex_decision_state{state=\"success\"}",
- "format": "time_series",
- "hide": false,
- "instant": true,
- "interval": "",
- "legendFormat": "{{state}}",
- "refId": "A"
- },
- {
- "exemplar": true,
- "expr": "cortex_decision_state{state!=\"success\",state!=\"waiting\"}",
- "hide": false,
- "instant": true,
- "interval": "",
- "legendFormat": "{{state}}",
- "refId": "B"
- },
- {
- "exemplar": true,
- "expr": "cortex_decision_state{state!=\"success\",state=\"waiting\"}",
- "hide": false,
- "instant": true,
- "interval": "",
- "legendFormat": "{{state}}",
- "refId": "C"
- }
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Decision status",
- "type": "stat"
- },
{
"collapsed": false,
"datasource": null,