diff --git a/PROJECT b/PROJECT index 11519f97..5afc755e 100644 --- a/PROJECT +++ b/PROJECT @@ -59,14 +59,6 @@ resources: kind: Descheduling path: github.com/cobaltcore-dev/cortex/api/v1alpha1 version: v1alpha1 -- api: - crdVersion: v1 - controller: true - domain: cortex - group: cortex.cloud - kind: Step - path: github.com/cobaltcore-dev/cortex/api/v1alpha1 - version: v1alpha1 - api: crdVersion: v1 controller: true diff --git a/Tiltfile b/Tiltfile index 654d079f..3777f574 100644 --- a/Tiltfile +++ b/Tiltfile @@ -31,6 +31,7 @@ helm_repo( ) ########### Dependency CRDs +# Make sure the local cluster is running if you are running into startup issues here. url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/hypervisor-crd.yaml' local('curl ' + url + ' | kubectl apply -f -') diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index 6a65f549..7d6176a8 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -43,7 +43,7 @@ type DecisionSpec struct { type StepResult struct { // object reference to the scheduler step. - StepRef corev1.ObjectReference `json:"stepRef"` + StepName string `json:"stepName"` // Activations of the step for each host. Activations map[string]float64 `json:"activations"` } diff --git a/api/v1alpha1/pipeline_types.go b/api/v1alpha1/pipeline_types.go index 3f1b9021..476c2677 100644 --- a/api/v1alpha1/pipeline_types.go +++ b/api/v1alpha1/pipeline_types.go @@ -6,11 +6,58 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" ) -type StepInPipeline struct { - // Reference to the step. - Ref corev1.ObjectReference `json:"ref"` +type DisabledValidationsSpec struct { + // Whether to validate that no subjects are removed or added from the scheduler + // step. This should only be disabled for scheduler steps that remove subjects. + // Thus, if no value is provided, the default is false. + SameSubjectNumberInOut bool `json:"sameSubjectNumberInOut,omitempty"` + // Whether to validate that, after running the step, there are remaining subjects. + // This should only be disabled for scheduler steps that are expected to + // remove all subjects. + SomeSubjectsRemain bool `json:"someSubjectsRemain,omitempty"` +} + +type StepType string + +const ( + // Step for assigning weights to hosts. + StepTypeWeigher StepType = "weigher" + // Step for filtering hosts. + StepTypeFilter StepType = "filter" + // Step for generating descheduling recommendations. + StepTypeDescheduler StepType = "descheduler" +) + +type WeigherSpec struct { + // The validations to disable for this step. If none are provided, all + // applied validations are enabled. + // +kubebuilder:validation:Optional + DisabledValidations DisabledValidationsSpec `json:"disabledValidations,omitempty"` +} + +type StepSpec struct { + // The type of the scheduler step. + Type StepType `json:"type"` + // If the type is "weigher", this contains additional configuration for it. + // +kubebuilder:validation:Optional + Weigher *WeigherSpec `json:"weigher,omitempty"` + + // The name of the scheduler step in the cortex implementation. + Impl string `json:"impl"` + // Additional configuration for the extractor that can be used + // +kubebuilder:validation:Optional + Opts runtime.RawExtension `json:"opts,omitempty"` + // Knowledges this step depends on to be ready. + // +kubebuilder:validation:Optional + Knowledges []corev1.ObjectReference `json:"knowledges,omitempty"` + // Additional description of the step which helps understand its purpose + // and decisions made by it. + // +kubebuilder:validation:Optional + Description string `json:"description,omitempty"` + // Whether this step is mandatory for the pipeline to be runnable. // +kubebuilder:default=true Mandatory bool `json:"mandatory"` @@ -41,7 +88,7 @@ type PipelineSpec struct { // The type of the pipeline. Type PipelineType `json:"type"` // The ordered list of steps that make up this pipeline. - Steps []StepInPipeline `json:"steps,omitempty"` + Steps []StepSpec `json:"steps,omitempty"` } const ( diff --git a/api/v1alpha1/step_types.go b/api/v1alpha1/step_types.go deleted file mode 100644 index 40e9d96c..00000000 --- a/api/v1alpha1/step_types.go +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package v1alpha1 - -import ( - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" -) - -type DisabledValidationsSpec struct { - // Whether to validate that no subjects are removed or added from the scheduler - // step. This should only be disabled for scheduler steps that remove subjects. - // Thus, if no value is provided, the default is false. - SameSubjectNumberInOut bool `json:"sameSubjectNumberInOut,omitempty"` - // Whether to validate that, after running the step, there are remaining subjects. - // This should only be disabled for scheduler steps that are expected to - // remove all subjects. - SomeSubjectsRemain bool `json:"someSubjectsRemain,omitempty"` -} - -type StepType string - -const ( - // Step for assigning weights to hosts. - StepTypeWeigher StepType = "weigher" - // Step for filtering hosts. - StepTypeFilter StepType = "filter" - // Step for generating descheduling recommendations. - StepTypeDescheduler StepType = "descheduler" -) - -type WeigherSpec struct { - // The validations to disable for this step. If none are provided, all - // applied validations are enabled. - // +kubebuilder:validation:Optional - DisabledValidations DisabledValidationsSpec `json:"disabledValidations,omitempty"` -} - -type StepSpec struct { - // SchedulingDomain defines in which scheduling domain this step - // is used (e.g., nova, cinder, manila). - SchedulingDomain SchedulingDomain `json:"schedulingDomain"` - - // The type of the scheduler step. - Type StepType `json:"type"` - // If the type is "weigher", this contains additional configuration for it. - // +kubebuilder:validation:Optional - Weigher *WeigherSpec `json:"weigher,omitempty"` - - // The name of the scheduler step in the cortex implementation. - Impl string `json:"impl"` - // Additional configuration for the extractor that can be used - // +kubebuilder:validation:Optional - Opts runtime.RawExtension `json:"opts,omitempty"` - // Knowledges this step depends on to be ready. - // +kubebuilder:validation:Optional - Knowledges []corev1.ObjectReference `json:"knowledges,omitempty"` - // Additional description of the step which helps understand its purpose - // and decisions made by it. - // +kubebuilder:validation:Optional - Description string `json:"description,omitempty"` -} - -const ( - // Something went wrong during the step reconciliation. - StepConditionError = "Error" -) - -type StepStatus struct { - // If the step is ready to be executed. - Ready bool `json:"ready"` - // How many knowledges have been extracted. - ReadyKnowledges int `json:"readyKnowledges"` - // Total number of knowledges configured. - TotalKnowledges int `json:"totalKnowledges"` - // "ReadyKnowledges / TotalKnowledges ready" as a human-readable string - // or "ready" if there are no knowledges configured. - KnowledgesReadyFrac string `json:"knowledgesReadyFrac,omitempty"` - // The current status conditions of the step. - // +kubebuilder:validation:Optional - Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` -} - -// +kubebuilder:object:root=true -// +kubebuilder:subresource:status -// +kubebuilder:resource:scope=Cluster -// +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" -// +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" -// +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type" -// +kubebuilder:printcolumn:name="Ready",type="boolean",JSONPath=".status.ready" -// +kubebuilder:printcolumn:name="Knowledges",type="string",JSONPath=".status.knowledgesReadyFrac" - -// Step is the Schema for the deschedulings API -type Step struct { - metav1.TypeMeta `json:",inline"` - - // metadata is a standard object metadata - // +optional - metav1.ObjectMeta `json:"metadata,omitempty,omitzero"` - - // spec defines the desired state of Step - // +required - Spec StepSpec `json:"spec"` - - // status defines the observed state of Step - // +optional - Status StepStatus `json:"status,omitempty,omitzero"` -} - -// +kubebuilder:object:root=true - -// StepList contains a list of Step -type StepList struct { - metav1.TypeMeta `json:",inline"` - metav1.ListMeta `json:"metadata,omitempty"` - Items []Step `json:"items"` -} - -func (*Step) URI() string { return "steps.cortex.cloud/v1alpha1" } -func (*StepList) URI() string { return "steps.cortex.cloud/v1alpha1" } - -func init() { - SchemeBuilder.Register(&Step{}, &StepList{}) -} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index fa1602f4..5097d1e9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -860,8 +860,10 @@ func (in *PipelineSpec) DeepCopyInto(out *PipelineSpec) { *out = *in if in.Steps != nil { in, out := &in.Steps, &out.Steps - *out = make([]StepInPipeline, len(*in)) - copy(*out, *in) + *out = make([]StepSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } } @@ -1077,85 +1079,9 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Step) DeepCopyInto(out *Step) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) - in.Status.DeepCopyInto(&out.Status) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Step. -func (in *Step) DeepCopy() *Step { - if in == nil { - return nil - } - out := new(Step) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *Step) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepInPipeline) DeepCopyInto(out *StepInPipeline) { - *out = *in - out.Ref = in.Ref -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepInPipeline. -func (in *StepInPipeline) DeepCopy() *StepInPipeline { - if in == nil { - return nil - } - out := new(StepInPipeline) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepList) DeepCopyInto(out *StepList) { - *out = *in - out.TypeMeta = in.TypeMeta - in.ListMeta.DeepCopyInto(&out.ListMeta) - if in.Items != nil { - in, out := &in.Items, &out.Items - *out = make([]Step, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepList. -func (in *StepList) DeepCopy() *StepList { - if in == nil { - return nil - } - out := new(StepList) - in.DeepCopyInto(out) - return out -} - -// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. -func (in *StepList) DeepCopyObject() runtime.Object { - if c := in.DeepCopy(); c != nil { - return c - } - return nil -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *StepResult) DeepCopyInto(out *StepResult) { *out = *in - out.StepRef = in.StepRef if in.Activations != nil { in, out := &in.Activations, &out.Activations *out = make(map[string]float64, len(*in)) @@ -1201,28 +1127,6 @@ func (in *StepSpec) DeepCopy() *StepSpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepStatus) DeepCopyInto(out *StepStatus) { - *out = *in - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]metav1.Condition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepStatus. -func (in *StepStatus) DeepCopy() *StepStatus { - if in == nil { - return nil - } - out := new(StepStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WeigherSpec) DeepCopyInto(out *WeigherSpec) { *out = *in diff --git a/config/crd/bases/cortex.cloud_decisions.yaml b/config/crd/bases/cortex.cloud_decisions.yaml index 6a8f7b20..c4dc8acb 100644 --- a/config/crd/bases/cortex.cloud_decisions.yaml +++ b/config/crd/bases/cortex.cloud_decisions.yaml @@ -379,52 +379,12 @@ spec: type: number description: Activations of the step for each host. type: object - stepRef: + stepName: description: object reference to the scheduler step. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic + type: string required: - activations - - stepRef + - stepName type: object type: array targetHost: diff --git a/config/crd/bases/cortex.cloud_pipelines.yaml b/config/crd/bases/cortex.cloud_pipelines.yaml index a1bad952..43c7dea1 100644 --- a/config/crd/bases/cortex.cloud_pipelines.yaml +++ b/config/crd/bases/cortex.cloud_pipelines.yaml @@ -73,57 +73,101 @@ spec: description: The ordered list of steps that make up this pipeline. items: properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + impl: + description: The name of the scheduler step in the cortex implementation. + type: string + knowledges: + description: Knowledges this step depends on to be ready. + items: + description: ObjectReference contains enough information to + let you inspect or modify the referred object. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + type: array mandatory: default: true description: Whether this step is mandatory for the pipeline to be runnable. type: boolean - ref: - description: Reference to the step. + opts: + description: Additional configuration for the extractor that + can be used + type: object + x-kubernetes-preserve-unknown-fields: true + type: + description: The type of the scheduler step. + type: string + weigher: + description: If the type is "weigher", this contains additional + configuration for it. properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: + disabledValidations: description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string + The validations to disable for this step. If none are provided, all + applied validations are enabled. + properties: + sameSubjectNumberInOut: + description: |- + Whether to validate that no subjects are removed or added from the scheduler + step. This should only be disabled for scheduler steps that remove subjects. + Thus, if no value is provided, the default is false. + type: boolean + someSubjectsRemain: + description: |- + Whether to validate that, after running the step, there are remaining subjects. + This should only be disabled for scheduler steps that are expected to + remove all subjects. + type: boolean + type: object type: object - x-kubernetes-map-type: atomic required: + - impl - mandatory - - ref + - type type: object type: array type: diff --git a/config/crd/bases/cortex.cloud_steps.yaml b/config/crd/bases/cortex.cloud_steps.yaml deleted file mode 100644 index a0269716..00000000 --- a/config/crd/bases/cortex.cloud_steps.yaml +++ /dev/null @@ -1,239 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.17.2 - name: steps.cortex.cloud -spec: - group: cortex.cloud - names: - kind: Step - listKind: StepList - plural: steps - singular: step - scope: Cluster - versions: - - additionalPrinterColumns: - - jsonPath: .metadata.creationTimestamp - name: Created - type: date - - jsonPath: .spec.schedulingDomain - name: Domain - type: string - - jsonPath: .spec.type - name: Type - type: string - - jsonPath: .status.ready - name: Ready - type: boolean - - jsonPath: .status.knowledgesReadyFrac - name: Knowledges - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - description: Step is the Schema for the deschedulings API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: spec defines the desired state of Step - properties: - description: - description: |- - Additional description of the step which helps understand its purpose - and decisions made by it. - type: string - impl: - description: The name of the scheduler step in the cortex implementation. - type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to let - you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - opts: - description: Additional configuration for the extractor that can be - used - type: object - x-kubernetes-preserve-unknown-fields: true - schedulingDomain: - description: |- - SchedulingDomain defines in which scheduling domain this step - is used (e.g., nova, cinder, manila). - type: string - type: - description: The type of the scheduler step. - type: string - weigher: - description: If the type is "weigher", this contains additional configuration - for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object - type: object - required: - - impl - - schedulingDomain - - type - type: object - status: - description: status defines the observed state of Step - properties: - conditions: - description: The current status conditions of the step. - items: - description: Condition contains details for one aspect of the current - state of this API Resource. - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - knowledgesReadyFrac: - description: |- - "ReadyKnowledges / TotalKnowledges ready" as a human-readable string - or "ready" if there are no knowledges configured. - type: string - ready: - description: If the step is ready to be executed. - type: boolean - readyKnowledges: - description: How many knowledges have been extracted. - type: integer - totalKnowledges: - description: Total number of knowledges configured. - type: integer - required: - - ready - - readyKnowledges - - totalKnowledges - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} diff --git a/config/crd/cortex.cloud_decisions.yaml b/config/crd/cortex.cloud_decisions.yaml index 6a8f7b20..c4dc8acb 100644 --- a/config/crd/cortex.cloud_decisions.yaml +++ b/config/crd/cortex.cloud_decisions.yaml @@ -379,52 +379,12 @@ spec: type: number description: Activations of the step for each host. type: object - stepRef: + stepName: description: object reference to the scheduler step. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic + type: string required: - activations - - stepRef + - stepName type: object type: array targetHost: diff --git a/config/crd/cortex.cloud_pipelines.yaml b/config/crd/cortex.cloud_pipelines.yaml index a1bad952..43c7dea1 100644 --- a/config/crd/cortex.cloud_pipelines.yaml +++ b/config/crd/cortex.cloud_pipelines.yaml @@ -73,57 +73,101 @@ spec: description: The ordered list of steps that make up this pipeline. items: properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + impl: + description: The name of the scheduler step in the cortex implementation. + type: string + knowledges: + description: Knowledges this step depends on to be ready. + items: + description: ObjectReference contains enough information to + let you inspect or modify the referred object. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + type: array mandatory: default: true description: Whether this step is mandatory for the pipeline to be runnable. type: boolean - ref: - description: Reference to the step. + opts: + description: Additional configuration for the extractor that + can be used + type: object + x-kubernetes-preserve-unknown-fields: true + type: + description: The type of the scheduler step. + type: string + weigher: + description: If the type is "weigher", this contains additional + configuration for it. properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: + disabledValidations: description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string + The validations to disable for this step. If none are provided, all + applied validations are enabled. + properties: + sameSubjectNumberInOut: + description: |- + Whether to validate that no subjects are removed or added from the scheduler + step. This should only be disabled for scheduler steps that remove subjects. + Thus, if no value is provided, the default is false. + type: boolean + someSubjectsRemain: + description: |- + Whether to validate that, after running the step, there are remaining subjects. + This should only be disabled for scheduler steps that are expected to + remove all subjects. + type: boolean + type: object type: object - x-kubernetes-map-type: atomic required: + - impl - mandatory - - ref + - type type: object type: array type: diff --git a/config/crd/cortex.cloud_steps.yaml b/config/crd/cortex.cloud_steps.yaml deleted file mode 100644 index a0269716..00000000 --- a/config/crd/cortex.cloud_steps.yaml +++ /dev/null @@ -1,239 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.17.2 - name: steps.cortex.cloud -spec: - group: cortex.cloud - names: - kind: Step - listKind: StepList - plural: steps - singular: step - scope: Cluster - versions: - - additionalPrinterColumns: - - jsonPath: .metadata.creationTimestamp - name: Created - type: date - - jsonPath: .spec.schedulingDomain - name: Domain - type: string - - jsonPath: .spec.type - name: Type - type: string - - jsonPath: .status.ready - name: Ready - type: boolean - - jsonPath: .status.knowledgesReadyFrac - name: Knowledges - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - description: Step is the Schema for the deschedulings API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: spec defines the desired state of Step - properties: - description: - description: |- - Additional description of the step which helps understand its purpose - and decisions made by it. - type: string - impl: - description: The name of the scheduler step in the cortex implementation. - type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to let - you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - opts: - description: Additional configuration for the extractor that can be - used - type: object - x-kubernetes-preserve-unknown-fields: true - schedulingDomain: - description: |- - SchedulingDomain defines in which scheduling domain this step - is used (e.g., nova, cinder, manila). - type: string - type: - description: The type of the scheduler step. - type: string - weigher: - description: If the type is "weigher", this contains additional configuration - for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object - type: object - required: - - impl - - schedulingDomain - - type - type: object - status: - description: status defines the observed state of Step - properties: - conditions: - description: The current status conditions of the step. - items: - description: Condition contains details for one aspect of the current - state of this API Resource. - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - knowledgesReadyFrac: - description: |- - "ReadyKnowledges / TotalKnowledges ready" as a human-readable string - or "ready" if there are no knowledges configured. - type: string - ready: - description: If the step is ready to be executed. - type: boolean - readyKnowledges: - description: How many knowledges have been extracted. - type: integer - totalKnowledges: - description: Total number of knowledges configured. - type: integer - required: - - ready - - readyKnowledges - - totalKnowledges - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} diff --git a/dist/chart/templates/crd/cortex.cloud_decisions.yaml b/dist/chart/templates/crd/cortex.cloud_decisions.yaml index c7d4c07f..52d2f07f 100644 --- a/dist/chart/templates/crd/cortex.cloud_decisions.yaml +++ b/dist/chart/templates/crd/cortex.cloud_decisions.yaml @@ -385,52 +385,12 @@ spec: type: number description: Activations of the step for each host. type: object - stepRef: + stepName: description: object reference to the scheduler step. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic + type: string required: - activations - - stepRef + - stepName type: object type: array targetHost: diff --git a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml index 25536537..752b6e23 100644 --- a/dist/chart/templates/crd/cortex.cloud_pipelines.yaml +++ b/dist/chart/templates/crd/cortex.cloud_pipelines.yaml @@ -79,57 +79,101 @@ spec: description: The ordered list of steps that make up this pipeline. items: properties: + description: + description: |- + Additional description of the step which helps understand its purpose + and decisions made by it. + type: string + impl: + description: The name of the scheduler step in the cortex implementation. + type: string + knowledges: + description: Knowledges this step depends on to be ready. + items: + description: ObjectReference contains enough information to + let you inspect or modify the referred object. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + type: array mandatory: default: true description: Whether this step is mandatory for the pipeline to be runnable. type: boolean - ref: - description: Reference to the step. + opts: + description: Additional configuration for the extractor that + can be used + type: object + x-kubernetes-preserve-unknown-fields: true + type: + description: The type of the scheduler step. + type: string + weigher: + description: If the type is "weigher", this contains additional + configuration for it. properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: + disabledValidations: description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string + The validations to disable for this step. If none are provided, all + applied validations are enabled. + properties: + sameSubjectNumberInOut: + description: |- + Whether to validate that no subjects are removed or added from the scheduler + step. This should only be disabled for scheduler steps that remove subjects. + Thus, if no value is provided, the default is false. + type: boolean + someSubjectsRemain: + description: |- + Whether to validate that, after running the step, there are remaining subjects. + This should only be disabled for scheduler steps that are expected to + remove all subjects. + type: boolean + type: object type: object - x-kubernetes-map-type: atomic required: + - impl - mandatory - - ref + - type type: object type: array type: diff --git a/dist/chart/templates/crd/cortex.cloud_steps.yaml b/dist/chart/templates/crd/cortex.cloud_steps.yaml deleted file mode 100644 index 15722f7c..00000000 --- a/dist/chart/templates/crd/cortex.cloud_steps.yaml +++ /dev/null @@ -1,246 +0,0 @@ -{{- if .Values.crd.enable }} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - labels: - {{- include "chart.labels" . | nindent 4 }} - annotations: - {{- if .Values.crd.keep }} - "helm.sh/resource-policy": keep - {{- end }} - controller-gen.kubebuilder.io/version: v0.17.2 - name: steps.cortex.cloud -spec: - group: cortex.cloud - names: - kind: Step - listKind: StepList - plural: steps - singular: step - scope: Cluster - versions: - - additionalPrinterColumns: - - jsonPath: .metadata.creationTimestamp - name: Created - type: date - - jsonPath: .spec.schedulingDomain - name: Domain - type: string - - jsonPath: .spec.type - name: Type - type: string - - jsonPath: .status.ready - name: Ready - type: boolean - - jsonPath: .status.knowledgesReadyFrac - name: Knowledges - type: string - name: v1alpha1 - schema: - openAPIV3Schema: - description: Step is the Schema for the deschedulings API - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: spec defines the desired state of Step - properties: - description: - description: |- - Additional description of the step which helps understand its purpose - and decisions made by it. - type: string - impl: - description: The name of the scheduler step in the cortex implementation. - type: string - knowledges: - description: Knowledges this step depends on to be ready. - items: - description: ObjectReference contains enough information to let - you inspect or modify the referred object. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - opts: - description: Additional configuration for the extractor that can be - used - type: object - x-kubernetes-preserve-unknown-fields: true - schedulingDomain: - description: |- - SchedulingDomain defines in which scheduling domain this step - is used (e.g., nova, cinder, manila). - type: string - type: - description: The type of the scheduler step. - type: string - weigher: - description: If the type is "weigher", this contains additional configuration - for it. - properties: - disabledValidations: - description: |- - The validations to disable for this step. If none are provided, all - applied validations are enabled. - properties: - sameSubjectNumberInOut: - description: |- - Whether to validate that no subjects are removed or added from the scheduler - step. This should only be disabled for scheduler steps that remove subjects. - Thus, if no value is provided, the default is false. - type: boolean - someSubjectsRemain: - description: |- - Whether to validate that, after running the step, there are remaining subjects. - This should only be disabled for scheduler steps that are expected to - remove all subjects. - type: boolean - type: object - type: object - required: - - impl - - schedulingDomain - - type - type: object - status: - description: status defines the observed state of Step - properties: - conditions: - description: The current status conditions of the step. - items: - description: Condition contains details for one aspect of the current - state of this API Resource. - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - knowledgesReadyFrac: - description: |- - "ReadyKnowledges / TotalKnowledges ready" as a human-readable string - or "ready" if there are no knowledges configured. - type: string - ready: - description: If the step is ready to be executed. - type: boolean - readyKnowledges: - description: How many knowledges have been extracted. - type: integer - totalKnowledges: - description: Total number of knowledges configured. - type: integer - required: - - ready - - readyKnowledges - - totalKnowledges - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} -{{- end -}} diff --git a/helm/bundles/cortex-cinder/templates/steps.yaml b/helm/bundles/cortex-cinder/templates/steps.yaml deleted file mode 100644 index 73b314ff..00000000 --- a/helm/bundles/cortex-cinder/templates/steps.yaml +++ /dev/null @@ -1 +0,0 @@ ---- \ No newline at end of file diff --git a/helm/bundles/cortex-ironcore/templates/pipelines.yaml b/helm/bundles/cortex-ironcore/templates/pipelines.yaml index 60d7dae1..231e95e4 100644 --- a/helm/bundles/cortex-ironcore/templates/pipelines.yaml +++ b/helm/bundles/cortex-ironcore/templates/pipelines.yaml @@ -10,5 +10,11 @@ spec: type: filter-weigher createDecisions: true steps: - - ref: {name: machinepools-noop} + - type: weigher + impl: noop + description: | + This is only a passthrough step which assigns a zero-weight to all machinepool + candidates. It is used as a placeholder step in the ironcore machines scheduler + pipeline. + knowledges: [] mandatory: false diff --git a/helm/bundles/cortex-ironcore/templates/steps.yaml b/helm/bundles/cortex-ironcore/templates/steps.yaml deleted file mode 100644 index f2a15990..00000000 --- a/helm/bundles/cortex-ironcore/templates/steps.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: machinepools-noop -spec: - schedulingDomain: machines - type: weigher - impl: noop - description: | - This is only a passthrough step which assigns a zero-weight to all machinepool - candidates. It is used as a placeholder step in the ironcore machines scheduler - pipeline. - knowledges: [] diff --git a/helm/bundles/cortex-manila/templates/pipelines.yaml b/helm/bundles/cortex-manila/templates/pipelines.yaml index 60970fb1..aba1e531 100644 --- a/helm/bundles/cortex-manila/templates/pipelines.yaml +++ b/helm/bundles/cortex-manila/templates/pipelines.yaml @@ -12,5 +12,21 @@ spec: Cortex returns a ranked list of hosts back to manila for final selection. type: filter-weigher steps: - - ref: {name: netapp-cpu-usage-balancing-manila} + - type: weigher + impl: netapp_cpu_usage_balancing + description: | + This step uses netapp storage pool cpu metrics condensed into a feature + to balance manila share placements across available storage pools. + Its main purpose is to avoid cpu overutilization on a storage pool which + may lead to performance degradation for shares placed on that pool. + opts: + # Min-max scaling for gap-fitting based on CPU usage (pct) + avgCPUUsageLowerBound: 0 # pct + avgCPUUsageUpperBound: 10 # pct + avgCPUUsageActivationLowerBound: 0.0 + avgCPUUsageActivationUpperBound: -0.75 + maxCPUUsageLowerBound: 0 # pct + maxCPUUsageUpperBound: 10 # pct + maxCPUUsageActivationLowerBound: 0.0 + maxCPUUsageActivationUpperBound: -0.25 mandatory: false diff --git a/helm/bundles/cortex-manila/templates/steps.yaml b/helm/bundles/cortex-manila/templates/steps.yaml deleted file mode 100644 index 5ec8882d..00000000 --- a/helm/bundles/cortex-manila/templates/steps.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: netapp-cpu-usage-balancing-manila -spec: - schedulingDomain: manila - type: weigher - impl: netapp_cpu_usage_balancing - description: | - This step uses netapp storage pool cpu metrics condensed into a feature - to balance manila share placements across available storage pools. - Its main purpose is to avoid cpu overutilization on a storage pool which - may lead to performance degradation for shares placed on that pool. - opts: - # Min-max scaling for gap-fitting based on CPU usage (pct) - avgCPUUsageLowerBound: 0 # pct - avgCPUUsageUpperBound: 10 # pct - avgCPUUsageActivationLowerBound: 0.0 - avgCPUUsageActivationUpperBound: -0.75 - maxCPUUsageLowerBound: 0 # pct - maxCPUUsageUpperBound: 10 # pct - maxCPUUsageActivationLowerBound: 0.0 - maxCPUUsageActivationUpperBound: -0.25 \ No newline at end of file diff --git a/helm/bundles/cortex-nova/templates/pipelines.yaml b/helm/bundles/cortex-nova/templates/pipelines.yaml index 981ab4b5..0009c58e 100644 --- a/helm/bundles/cortex-nova/templates/pipelines.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines.yaml @@ -15,13 +15,73 @@ spec: type: filter-weigher createDecisions: false steps: - - ref: {name: vmware-hana-binpacking} + - type: weigher + impl: vmware_hana_binpacking + description: | + This step pulls HANA VMs onto the smallest possible gaps on HANA-exclusive + VMware hosts. In this way hosts with much free space are held free for + larger HANA VMs, improving overall packing efficiency for HANA workloads. + knowledges: + - name: host-utilization + - name: host-capabilities + opts: + ramUtilizedAfterLowerBoundPct: 0 + ramUtilizedAfterUpperBoundPct: 100 + ramUtilizedAfterActivationLowerBound: 0.0 + ramUtilizedAfterActivationUpperBound: 1.0 mandatory: false - - ref: {name: vmware-general-purpose-balancing} + - type: weigher + impl: vmware_general_purpose_balancing + description: | + This step balances non-HANA VMs across non-HANA exclusive VMware hosts. It + pulls vms onto the freeest hosts possible to ensure an even distribution of + workloads across the available infrastructure. + knowledges: + - name: host-utilization + - name: host-capabilities + opts: + ramUtilizedLowerBoundPct: 0 + ramUtilizedUpperBoundPct: 100 + ramUtilizedActivationLowerBound: 1.0 + ramUtilizedActivationUpperBound: 0.0 mandatory: false - - ref: {name: vmware-avoid-long-term-contended-hosts} + - type: weigher + impl: vmware_avoid_long_term_contended_hosts + description: | + This step avoids placing vms on vmware hosts with a high CPU contention over + a longer period of time, based on vrops contention metrics. In particular, + this step looks at a longer time window of 4 weeks to identify hosts that + are consistently contended. + knowledges: + - name: vmware-long-term-contended-hosts + opts: + avgCPUContentionLowerBound: 0 # pct + avgCPUContentionUpperBound: 10 # pct + avgCPUContentionActivationLowerBound: 0.0 + avgCPUContentionActivationUpperBound: -0.75 + maxCPUContentionLowerBound: 0 # pct + maxCPUContentionUpperBound: 10 # pct + maxCPUContentionActivationLowerBound: 0.0 + maxCPUContentionActivationUpperBound: -0.25 mandatory: false - - ref: {name: vmware-avoid-short-term-contended-hosts} + - type: weigher + impl: vmware_avoid_short_term_contended_hosts + description: | + This step avoids placing vms on vmware hosts with a high CPU contention over + a shorter period of time, based on vrops contention metrics. In particular, + this step looks at a shorter time window of 20 minutes to identify hosts that + are currently contended. + knowledges: + - name: vmware-short-term-contended-hosts + opts: + avgCPUContentionLowerBound: 0 # pct + avgCPUContentionUpperBound: 10 # pct + avgCPUContentionActivationLowerBound: 0.0 + avgCPUContentionActivationUpperBound: -0.75 + maxCPUContentionLowerBound: 0 # pct + maxCPUContentionUpperBound: 10 # pct + maxCPUContentionActivationLowerBound: 0.0 + maxCPUContentionActivationUpperBound: -0.25 mandatory: false --- apiVersion: cortex.cloud/v1alpha1 @@ -59,17 +119,85 @@ spec: createDecisions: true {{- end }} steps: - - ref: {name: filter-host-instructions} - - ref: {name: filter-has-enough-capacity} - - ref: {name: filter-has-requested-traits} - - ref: {name: filter-has-accelerators} - - ref: {name: filter-correct-az} - - ref: {name: filter-status-conditions} - - ref: {name: filter-maintenance} - - ref: {name: filter-external-customer} - - ref: {name: filter-packed-virtqueue} - - ref: {name: filter-allowed-projects} - - ref: {name: filter-capabilities} + - type: filter + impl: filter_host_instructions + description: | + This step will consider the `ignore_hosts` and `force_hosts` instructions + from the nova scheduler request spec to filter out or exclusively allow + certain hosts. + knowledges: [] + - type: filter + impl: filter_has_enough_capacity + description: | + This step will filter out hosts that do not have enough available capacity + to host the requested flavor. If enabled, this step will subtract the + current reservations residing on this host from the available capacity. + opts: + # If reserved space should be locked even for matching requests. + # For the reservations pipeline, we don't want to unlock + # reserved space, to avoid reservations for the same project + # and flavor to overlap. + lockReserved: true + - type: filter + impl: filter_has_requested_traits + description: | + This step filters hosts that do not have the requested traits given by the + nova flavor extra spec: "trait:": "forbidden" means the host must + not have the specified trait. "trait:": "required" means the host + must have the specified trait. + - type: filter + impl: filter_has_accelerators + description: | + This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if + the nova flavor extra specs request accelerators via "accel:device_profile". + - type: filter + impl: filter_correct_az + description: | + This step will filter out hosts whose aggregate information indicates they + are not placed in the requested availability zone. + - type: filter + impl: filter_status_conditions + description: | + This step will filter out hosts for which the hypervisor status conditions + do not meet the expected values, for example, that the hypervisor is ready + and not disabled. + - type: filter + impl: filter_maintenance + description: | + This step will filter out hosts that are currently in maintenance mode that + prevents scheduling, for example, manual maintenance or termination. + - type: filter + impl: filter_external_customer + description: | + This step prefix-matches the domain name for external customer domains and + filters out hosts that are not intended for external customers. It considers + the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the + `domain_name` scheduler hint from the nova request spec. + opts: + domainNamePrefixes: ["iaas-"] + - type: filter + impl: filter_packed_virtqueue + description: | + If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the + image properties contain the `hw_virtio_packed_ring` key, this step will + filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait. + - type: filter + impl: filter_allowed_projects + description: | + This step filters hosts based on allowed projects defined in the + hypervisor resource. Note that hosts allowing all projects are still + accessible and will not be filtered out. In this way some hypervisors + are made accessible to some projects only. + - type: filter + impl: filter_capabilities + description: | + This step will filter out hosts that do not meet the compute capabilities + requested by the nova flavor extra specs, like `{"arch": "x86_64", + "maxphysaddr:bits": 46, ...}`. + + Note: currently, advanced boolean/numeric operators for the capabilities + like `>`, `!`, ... are not supported because they are not used by any of our + flavors in production. --- apiVersion: cortex.cloud/v1alpha1 kind: Pipeline @@ -86,5 +214,13 @@ spec: createDecisions: true {{- end }} steps: - - ref: {name: avoid-high-steal-pct} + - type: descheduler + impl: avoid_high_steal_pct + description: | + This step will deschedule VMs once they reach this CPU steal percentage over + the observed time span. + knowledges: + - name: kvm-libvirt-domain-cpu-steal-pct + opts: + maxStealPctOverObservedTimeSpan: 20.0 mandatory: false diff --git a/helm/bundles/cortex-nova/templates/steps.yaml b/helm/bundles/cortex-nova/templates/steps.yaml deleted file mode 100644 index c958f74e..00000000 --- a/helm/bundles/cortex-nova/templates/steps.yaml +++ /dev/null @@ -1,263 +0,0 @@ ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: vmware-hana-binpacking -spec: - schedulingDomain: nova - type: weigher - impl: vmware_hana_binpacking - description: | - This step pulls HANA VMs onto the smallest possible gaps on HANA-exclusive - VMware hosts. In this way hosts with much free space are held free for - larger HANA VMs, improving overall packing efficiency for HANA workloads. - knowledges: - - name: host-utilization - - name: host-capabilities - opts: - ramUtilizedAfterLowerBoundPct: 0 - ramUtilizedAfterUpperBoundPct: 100 - ramUtilizedAfterActivationLowerBound: 0.0 - ramUtilizedAfterActivationUpperBound: 1.0 ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: vmware-general-purpose-balancing -spec: - schedulingDomain: nova - type: weigher - impl: vmware_general_purpose_balancing - description: | - This step balances non-HANA VMs across non-HANA exclusive VMware hosts. It - pulls vms onto the freeest hosts possible to ensure an even distribution of - workloads across the available infrastructure. - knowledges: - - name: host-utilization - - name: host-capabilities - opts: - ramUtilizedLowerBoundPct: 0 - ramUtilizedUpperBoundPct: 100 - ramUtilizedActivationLowerBound: 1.0 - ramUtilizedActivationUpperBound: 0.0 ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: vmware-avoid-long-term-contended-hosts -spec: - schedulingDomain: nova - type: weigher - impl: vmware_avoid_long_term_contended_hosts - description: | - This step avoids placing vms on vmware hosts with a high CPU contention over - a longer period of time, based on vrops contention metrics. In particular, - this step looks at a longer time window of 4 weeks to identify hosts that - are consistently contended. - knowledges: - - name: vmware-long-term-contended-hosts - opts: - avgCPUContentionLowerBound: 0 # pct - avgCPUContentionUpperBound: 10 # pct - avgCPUContentionActivationLowerBound: 0.0 - avgCPUContentionActivationUpperBound: -0.75 - maxCPUContentionLowerBound: 0 # pct - maxCPUContentionUpperBound: 10 # pct - maxCPUContentionActivationLowerBound: 0.0 - maxCPUContentionActivationUpperBound: -0.25 ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: vmware-avoid-short-term-contended-hosts -spec: - schedulingDomain: nova - type: weigher - impl: vmware_avoid_short_term_contended_hosts - description: | - This step avoids placing vms on vmware hosts with a high CPU contention over - a shorter period of time, based on vrops contention metrics. In particular, - this step looks at a shorter time window of 20 minutes to identify hosts that - are currently contended. - knowledges: - - name: vmware-short-term-contended-hosts - opts: - avgCPUContentionLowerBound: 0 # pct - avgCPUContentionUpperBound: 10 # pct - avgCPUContentionActivationLowerBound: 0.0 - avgCPUContentionActivationUpperBound: -0.75 - maxCPUContentionLowerBound: 0 # pct - maxCPUContentionUpperBound: 10 # pct - maxCPUContentionActivationLowerBound: 0.0 - maxCPUContentionActivationUpperBound: -0.25 ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-host-instructions -spec: - schedulingDomain: nova - type: filter - impl: filter_host_instructions - description: | - This step will consider the `ignore_hosts` and `force_hosts` instructions - from the nova scheduler request spec to filter out or exclusively allow - certain hosts. - knowledges: [] ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-has-enough-capacity -spec: - schedulingDomain: nova - type: filter - impl: filter_has_enough_capacity - description: | - This step will filter out hosts that do not have enough available capacity - to host the requested flavor. If enabled, this step will subtract the - current reservations residing on this host from the available capacity. - opts: - # If reserved space should be locked even for matching requests. - # For the reservations pipeline, we don't want to unlock - # reserved space, to avoid reservations for the same project - # and flavor to overlap. - lockReserved: true ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-has-requested-traits -spec: - schedulingDomain: nova - type: filter - impl: filter_has_requested_traits - description: | - This step filters hosts that do not have the requested traits given by the - nova flavor extra spec: "trait:": "forbidden" means the host must - not have the specified trait. "trait:": "required" means the host - must have the specified trait. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-has-accelerators -spec: - schedulingDomain: nova - type: filter - impl: filter_has_accelerators - description: | - This step will filter out hosts without the trait `COMPUTE_ACCELERATORS` if - the nova flavor extra specs request accelerators via "accel:device_profile". ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-correct-az -spec: - schedulingDomain: nova - type: filter - impl: filter_correct_az - description: | - This step will filter out hosts whose aggregate information indicates they - are not placed in the requested availability zone. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-status-conditions -spec: - schedulingDomain: nova - type: filter - impl: filter_status_conditions - description: | - This step will filter out hosts for which the hypervisor status conditions - do not meet the expected values, for example, that the hypervisor is ready - and not disabled. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-maintenance -spec: - schedulingDomain: nova - type: filter - impl: filter_maintenance - description: | - This step will filter out hosts that are currently in maintenance mode that - prevents scheduling, for example, manual maintenance or termination. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-external-customer -spec: - schedulingDomain: nova - type: filter - impl: filter_external_customer - description: | - This step prefix-matches the domain name for external customer domains and - filters out hosts that are not intended for external customers. It considers - the `CUSTOM_EXTERNAL_CUSTOMER_SUPPORTED` trait on hosts as well as the - `domain_name` scheduler hint from the nova request spec. - opts: - domainNamePrefixes: ["iaas-"] ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-packed-virtqueue -spec: - schedulingDomain: nova - type: filter - impl: filter_packed_virtqueue - description: | - If the flavor extra specs contain the `hw:virtio_packed_ring` key, or the - image properties contain the `hw_virtio_packed_ring` key, this step will - filter out hosts that do not have the `COMPUTE_NET_VIRTIO_PACKED` trait. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-allowed-projects -spec: - schedulingDomain: nova - type: filter - impl: filter_allowed_projects - description: | - This step filters hosts based on allowed projects defined in the - hypervisor resource. Note that hosts allowing all projects are still - accessible and will not be filtered out. In this way some hypervisors - are made accessible to some projects only. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: filter-capabilities -spec: - schedulingDomain: nova - type: filter - impl: filter_capabilities - description: | - This step will filter out hosts that do not meet the compute capabilities - requested by the nova flavor extra specs, like `{"arch": "x86_64", - "maxphysaddr:bits": 46, ...}`. - - Note: currently, advanced boolean/numeric operators for the capabilities - like `>`, `!`, ... are not supported because they are not used by any of our - flavors in production. ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: avoid-high-steal-pct -spec: - schedulingDomain: nova - type: descheduler - impl: avoid_high_steal_pct - description: | - This step will deschedule VMs once they reach this CPU steal percentage over - the observed time span. - knowledges: [] - opts: - maxStealPctOverObservedTimeSpan: 20.0 diff --git a/helm/bundles/cortex-pods/templates/pipelines.yaml b/helm/bundles/cortex-pods/templates/pipelines.yaml index 6059547e..aec8db63 100644 --- a/helm/bundles/cortex-pods/templates/pipelines.yaml +++ b/helm/bundles/cortex-pods/templates/pipelines.yaml @@ -10,5 +10,10 @@ spec: type: filter-weigher createDecisions: true steps: - - ref: { name: pods-noop } + - type: filter + impl: noop + description: | + This is only a passthrough step which lets all pod candidates through. + It is used as a placeholder step in the pods scheduler pipeline. + knowledges: [] mandatory: false diff --git a/helm/bundles/cortex-pods/templates/steps.yaml b/helm/bundles/cortex-pods/templates/steps.yaml deleted file mode 100644 index ad926bf0..00000000 --- a/helm/bundles/cortex-pods/templates/steps.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -apiVersion: cortex.cloud/v1alpha1 -kind: Step -metadata: - name: pods-noop -spec: - schedulingDomain: pods - type: filter - impl: noop - description: | - This is only a passthrough step which lets all pod candidates through. - It is used as a placeholder step in the pods scheduler pipeline. - knowledges: [] diff --git a/internal/knowledge/kpis/plugins/deployment/step_state.go b/internal/knowledge/kpis/plugins/deployment/step_state.go deleted file mode 100644 index f39cffe8..00000000 --- a/internal/knowledge/kpis/plugins/deployment/step_state.go +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package deployment - -import ( - "context" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis/plugins" - "github.com/cobaltcore-dev/cortex/pkg/conf" - "github.com/cobaltcore-dev/cortex/pkg/db" - "github.com/prometheus/client_golang/prometheus" - "k8s.io/apimachinery/pkg/api/meta" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -type StepStateKPIOpts struct { - // The scheduling domain to filter steps by. - StepSchedulingDomain v1alpha1.SchedulingDomain `json:"stepSchedulingDomain"` -} - -// KPI observing the state of step resources managed by cortex. -type StepStateKPI struct { - // Common base for all KPIs that provides standard functionality. - plugins.BaseKPI[StepStateKPIOpts] - - // Prometheus descriptor for the step state metric. - counter *prometheus.Desc -} - -func (StepStateKPI) GetName() string { return "step_state_kpi" } - -// Initialize the KPI. -func (k *StepStateKPI) Init(db *db.DB, client client.Client, opts conf.RawOpts) error { - if err := k.BaseKPI.Init(db, client, opts); err != nil { - return err - } - k.counter = prometheus.NewDesc( - "cortex_step_state", - "State of cortex managed steps", - []string{"operator", "step", "state"}, - nil, - ) - return nil -} - -// Conform to the prometheus collector interface by providing the descriptor. -func (k *StepStateKPI) Describe(ch chan<- *prometheus.Desc) { ch <- k.counter } - -// Collect the step state metrics. -func (k *StepStateKPI) Collect(ch chan<- prometheus.Metric) { - // Get all steps with the specified step operator. - stepList := &v1alpha1.StepList{} - if err := k.Client.List(context.Background(), stepList); err != nil { - return - } - var steps []v1alpha1.Step - for _, step := range stepList.Items { - if step.Spec.SchedulingDomain != k.Options.StepSchedulingDomain { - continue - } - steps = append(steps, step) - } - // For each step, emit a metric with its state. - for _, step := range steps { - var state string - switch { - case meta.IsStatusConditionTrue(step.Status.Conditions, v1alpha1.StepConditionError): - state = "error" - case step.Status.Ready: - state = "ready" - default: - state = "unknown" - } - ch <- prometheus.MustNewConstMetric( - k.counter, prometheus.GaugeValue, 1, - string(k.Options.StepSchedulingDomain), step.Name, state, - ) - } -} diff --git a/internal/knowledge/kpis/plugins/deployment/step_state_test.go b/internal/knowledge/kpis/plugins/deployment/step_state_test.go deleted file mode 100644 index b5f96e85..00000000 --- a/internal/knowledge/kpis/plugins/deployment/step_state_test.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package deployment - -import ( - "testing" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/conf" - "github.com/prometheus/client_golang/prometheus" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestStepStateKPI_Init(t *testing.T) { - kpi := &StepStateKPI{} - if err := kpi.Init(nil, nil, conf.NewRawOpts(`{"stepSchedulingDomain": "test-operator"}`)); err != nil { - t.Fatalf("expected no error, got %v", err) - } -} - -func TestStepStateKPI_Collect(t *testing.T) { - scheme, err := v1alpha1.SchemeBuilder.Build() - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - tests := []struct { - name string - steps []v1alpha1.Step - operator string - expectedCount int - description string - }{ - { - name: "no steps", - steps: []v1alpha1.Step{}, - operator: "test-operator", - expectedCount: 0, - description: "should not collect metrics when no steps exist", - }, - { - name: "single ready step", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step1"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: true, - Conditions: []v1.Condition{}, - }, - }, - }, - operator: "test-operator", - expectedCount: 1, - description: "should collect metric for ready step", - }, - { - name: "step in error state", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step2"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: false, - Conditions: []v1.Condition{ - { - Type: v1alpha1.StepConditionError, - Status: v1.ConditionTrue, - }, - }, - }, - }, - }, - operator: "test-operator", - expectedCount: 1, - description: "should collect metric for error step", - }, - { - name: "multiple steps different states", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step-ready"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: true, - Conditions: []v1.Condition{}, - }, - }, - { - ObjectMeta: v1.ObjectMeta{Name: "step-error"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: false, - Conditions: []v1.Condition{ - { - Type: v1alpha1.StepConditionError, - Status: v1.ConditionTrue, - }, - }, - }, - }, - }, - operator: "test-operator", - expectedCount: 2, - description: "should collect metrics for all steps with different states", - }, - { - name: "filter by operator", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step-correct-operator"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: true, - Conditions: []v1.Condition{}, - }, - }, - { - ObjectMeta: v1.ObjectMeta{Name: "step-wrong-operator"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "other-operator"}, - Status: v1alpha1.StepStatus{ - Ready: true, - Conditions: []v1.Condition{}, - }, - }, - }, - operator: "test-operator", - expectedCount: 1, - description: "should only collect metrics for steps with matching operator", - }, - { - name: "step with unknown state", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step-unknown"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: false, - Conditions: []v1.Condition{}, - }, - }, - }, - operator: "test-operator", - expectedCount: 1, - description: "should collect metric with unknown state for step without ready status or error condition", - }, - { - name: "error condition takes precedence over ready status", - steps: []v1alpha1.Step{ - { - ObjectMeta: v1.ObjectMeta{Name: "step-error-priority"}, - Spec: v1alpha1.StepSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.StepStatus{ - Ready: true, - Conditions: []v1.Condition{ - { - Type: v1alpha1.StepConditionError, - Status: v1.ConditionTrue, - }, - }, - }, - }, - }, - operator: "test-operator", - expectedCount: 1, - description: "should report error state even if ready status is true", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := make([]v1alpha1.Step, len(tt.steps)) - copy(objects, tt.steps) - - clientBuilder := fake.NewClientBuilder().WithScheme(scheme) - for i := range objects { - clientBuilder = clientBuilder.WithObjects(&objects[i]) - } - client := clientBuilder.Build() - - kpi := &StepStateKPI{} - if err := kpi.Init(nil, client, conf.NewRawOpts(`{"stepSchedulingDomain": "`+tt.operator+`"}`)); err != nil { - t.Fatalf("expected no error, got %v", err) - } - - ch := make(chan prometheus.Metric, 10) - kpi.Collect(ch) - close(ch) - - metricsCount := 0 - for range ch { - metricsCount++ - } - - if metricsCount != tt.expectedCount { - t.Errorf("%s: expected %d metrics, got %d", tt.description, tt.expectedCount, metricsCount) - } - }) - } -} - -func TestStepStateKPI_GetName(t *testing.T) { - kpi := &StepStateKPI{} - expectedName := "step_state_kpi" - if name := kpi.GetName(); name != expectedName { - t.Errorf("expected name %q, got %q", expectedName, name) - } -} - -func TestStepStateKPI_Describe(t *testing.T) { - kpi := &StepStateKPI{} - if err := kpi.Init(nil, nil, conf.NewRawOpts(`{"stepSchedulingDomain": "test-operator"}`)); err != nil { - t.Fatalf("expected no error, got %v", err) - } - - ch := make(chan *prometheus.Desc, 1) - kpi.Describe(ch) - close(ch) - - descCount := 0 - for range ch { - descCount++ - } - - if descCount != 1 { - t.Errorf("expected 1 descriptor, got %d", descCount) - } -} diff --git a/internal/knowledge/kpis/supported_kpis.go b/internal/knowledge/kpis/supported_kpis.go index e9f83810..f98c8683 100644 --- a/internal/knowledge/kpis/supported_kpis.go +++ b/internal/knowledge/kpis/supported_kpis.go @@ -30,5 +30,4 @@ var supportedKPIs = map[string]plugins.KPI{ "decision_state_kpi": &deployment.DecisionStateKPI{}, "kpi_state_kpi": &deployment.KPIStateKPI{}, "pipeline_state_kpi": &deployment.PipelineStateKPI{}, - "step_state_kpi": &deployment.StepStateKPI{}, } diff --git a/internal/scheduling/decisions/cinder/pipeline_controller.go b/internal/scheduling/decisions/cinder/pipeline_controller.go index 7de976f7..66f4f5cc 100644 --- a/internal/scheduling/decisions/cinder/pipeline_controller.go +++ b/internal/scheduling/decisions/cinder/pipeline_controller.go @@ -8,7 +8,6 @@ import ( "encoding/json" "errors" "fmt" - "slices" "sync" "time" @@ -131,11 +130,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al // The base controller will delegate the pipeline creation down to this method. func (c *DecisionPipelineController) InitPipeline( ctx context.Context, - name string, - steps []v1alpha1.Step, + p v1alpha1.Pipeline, ) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor) + return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) } func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { @@ -162,29 +160,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl * return pipeline.Spec.Type == c.PipelineType() }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeFilter, - v1alpha1.StepTypeWeigher, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, diff --git a/internal/scheduling/decisions/cinder/pipeline_controller_test.go b/internal/scheduling/decisions/cinder/pipeline_controller_test.go index 0acbe0d3..2355687b 100644 --- a/internal/scheduling/decisions/cinder/pipeline_controller_test.go +++ b/internal/scheduling/decisions/cinder/pipeline_controller_test.go @@ -85,7 +85,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: false, @@ -114,7 +114,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: true, @@ -170,7 +170,16 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Type: v1alpha1.PipelineTypeFilterWeigher, + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Steps: []v1alpha1.StepSpec{}, + }, + }) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } @@ -284,7 +293,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -318,7 +327,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: false, @@ -373,7 +382,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -411,7 +420,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - pipeline, err := controller.InitPipeline(t.Context(), tt.pipelineConfig.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } @@ -480,25 +489,20 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool }{ { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "test-plugin", - }, + Type: v1alpha1.StepTypeFilter, + Impl: "test-plugin", }, }, expectError: true, // Expected because test-plugin is not in supportedSteps @@ -507,7 +511,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Type: v1alpha1.PipelineTypeFilterWeigher, + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Steps: tt.steps, + }, + }) if tt.expectError && err == nil { t.Error("Expected error but got none") diff --git a/internal/scheduling/decisions/explanation/explainer.go b/internal/scheduling/decisions/explanation/explainer.go index 1dc44b74..a5f199fa 100644 --- a/internal/scheduling/decisions/explanation/explainer.go +++ b/internal/scheduling/decisions/explanation/explainer.go @@ -231,7 +231,7 @@ func (e *Explainer) calculateScoresFromSteps(inputWeights map[string]float64, st for hostName := range currentScores { if _, exists := stepResult.Activations[hostName]; !exists { // Host not in this step's activations - will be deleted - deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepRef.Name) + deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepName) } } @@ -305,7 +305,7 @@ func (e *Explainer) findCriticalSteps(decision *v1alpha1.Decision) []string { // If removing this step changes the winner, it's critical if reducedWinner != baselineWinner { - criticalSteps = append(criticalSteps, stepResult.StepRef.Name) + criticalSteps = append(criticalSteps, stepResult.StepName) } } @@ -374,7 +374,7 @@ func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepRe promotedToFirst := !wasFirst && isFirstAfter impacts = append(impacts, StepImpact{ - Step: stepResult.StepRef.Name, + Step: stepResult.StepName, ScoreBefore: scoreBefore, ScoreAfter: scoreAfter, ScoreDelta: scoreAfter - scoreBefore, diff --git a/internal/scheduling/decisions/explanation/explainer_test.go b/internal/scheduling/decisions/explanation/explainer_test.go index a483152f..ed1d52e1 100644 --- a/internal/scheduling/decisions/explanation/explainer_test.go +++ b/internal/scheduling/decisions/explanation/explainer_test.go @@ -278,7 +278,7 @@ func WithHistoryRef(decision, historyDecision *v1alpha1.Decision) *v1alpha1.Deci // Generic step creator func Step(name string, activations map[string]float64) v1alpha1.StepResult { return v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: name}, + StepName: name, Activations: activations, } } @@ -446,28 +446,28 @@ func DecisionWithHistory(name, winner string) *DecisionBuilder { // Step result builders for common pipeline steps func ResourceWeigherStep(activations map[string]float64) v1alpha1.StepResult { return v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: "resource-weigher"}, + StepName: "resource-weigher", Activations: activations, } } func AvailabilityFilterStep(activations map[string]float64) v1alpha1.StepResult { return v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: "availability-filter"}, + StepName: "availability-filter", Activations: activations, } } func PlacementPolicyStep(activations map[string]float64) v1alpha1.StepResult { return v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: "placement-policy"}, + StepName: "placement-policy", Activations: activations, } } func WeigherStep(name string, activations map[string]float64) v1alpha1.StepResult { return v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: name}, + StepName: name, Activations: activations, } } diff --git a/internal/scheduling/decisions/machines/noop.go b/internal/scheduling/decisions/machines/noop.go index 88caebbc..3b0104aa 100644 --- a/internal/scheduling/decisions/machines/noop.go +++ b/internal/scheduling/decisions/machines/noop.go @@ -15,7 +15,7 @@ type NoopFilter struct { Alias string } -func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return nil } diff --git a/internal/scheduling/decisions/machines/pipeline_controller.go b/internal/scheduling/decisions/machines/pipeline_controller.go index 14ddc35f..d90f9c39 100644 --- a/internal/scheduling/decisions/machines/pipeline_controller.go +++ b/internal/scheduling/decisions/machines/pipeline_controller.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "slices" "sync" "time" @@ -173,11 +172,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al // The base controller will delegate the pipeline creation down to this method. func (c *DecisionPipelineController) InitPipeline( ctx context.Context, - name string, - steps []v1alpha1.Step, + p v1alpha1.Pipeline, ) (lib.Pipeline[ironcore.MachinePipelineRequest], error) { - return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor) + return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) } func (c *DecisionPipelineController) handleMachine() handler.EventHandler { @@ -261,29 +259,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl * return pipeline.Spec.Type == c.PipelineType() }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeFilter, - v1alpha1.StepTypeWeigher, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). Named("cortex-machine-scheduler"). For( &v1alpha1.Decision{}, diff --git a/internal/scheduling/decisions/machines/pipeline_controller_test.go b/internal/scheduling/decisions/machines/pipeline_controller_test.go index 64596149..f7eb8bf3 100644 --- a/internal/scheduling/decisions/machines/pipeline_controller_test.go +++ b/internal/scheduling/decisions/machines/pipeline_controller_test.go @@ -215,34 +215,30 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool }{ { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, { name: "noop step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - Spec: v1alpha1.StepSpec{ - Impl: "noop", - Type: v1alpha1.StepTypeFilter, - }, + Impl: "noop", + Type: v1alpha1.StepTypeFilter, }, }, expectError: false, }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - Spec: v1alpha1.StepSpec{ - Impl: "unsupported", - Type: v1alpha1.StepTypeFilter, - }, + Impl: "unsupported", + Type: v1alpha1.StepTypeFilter, }, }, expectError: true, @@ -251,7 +247,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Type: v1alpha1.PipelineTypeFilterWeigher, + SchedulingDomain: v1alpha1.SchedulingDomainMachines, + Steps: tt.steps, + }, + }) if tt.expectError && err == nil { t.Error("expected error but got none") @@ -317,7 +322,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -350,7 +355,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: false, @@ -396,7 +401,7 @@ func TestDecisionPipelineController_ProcessNewMachine(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, diff --git a/internal/scheduling/decisions/manila/pipeline_controller.go b/internal/scheduling/decisions/manila/pipeline_controller.go index 06451586..89f7b25e 100644 --- a/internal/scheduling/decisions/manila/pipeline_controller.go +++ b/internal/scheduling/decisions/manila/pipeline_controller.go @@ -8,7 +8,6 @@ import ( "encoding/json" "errors" "fmt" - "slices" "sync" "time" @@ -131,11 +130,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al // The base controller will delegate the pipeline creation down to this method. func (c *DecisionPipelineController) InitPipeline( ctx context.Context, - name string, - steps []v1alpha1.Step, + p v1alpha1.Pipeline, ) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor) + return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) } func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { @@ -162,29 +160,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl * return pipeline.Spec.Type == c.PipelineType() }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeFilter, - v1alpha1.StepTypeWeigher, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, diff --git a/internal/scheduling/decisions/manila/pipeline_controller_test.go b/internal/scheduling/decisions/manila/pipeline_controller_test.go index 9ab5c0b2..837fca8a 100644 --- a/internal/scheduling/decisions/manila/pipeline_controller_test.go +++ b/internal/scheduling/decisions/manila/pipeline_controller_test.go @@ -85,7 +85,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: false, @@ -114,7 +114,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: true, @@ -170,7 +170,12 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: tt.pipeline.Name, + }, + Spec: tt.pipeline.Spec, + }) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } @@ -284,7 +289,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -318,7 +323,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: false, @@ -373,7 +378,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -411,7 +416,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - pipeline, err := controller.InitPipeline(t.Context(), tt.pipelineConfig.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(t.Context(), *tt.pipelineConfig) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } @@ -480,27 +485,22 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool }{ { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, { name: "supported netapp step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeWeigher, - Impl: "netapp_cpu_usage_balancing", - Opts: runtime.RawExtension{ - Raw: []byte(`{"AvgCPUUsageLowerBound": 0, "AvgCPUUsageUpperBound": 90, "MaxCPUUsageLowerBound": 0, "MaxCPUUsageUpperBound": 100}`), - }, + Type: v1alpha1.StepTypeWeigher, + Impl: "netapp_cpu_usage_balancing", + Opts: runtime.RawExtension{ + Raw: []byte(`{"AvgCPUUsageLowerBound": 0, "AvgCPUUsageUpperBound": 90, "MaxCPUUsageLowerBound": 0, "MaxCPUUsageUpperBound": 100}`), }, }, }, @@ -508,15 +508,10 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "unsupported-plugin", - }, + Type: v1alpha1.StepTypeFilter, + Impl: "unsupported-plugin", }, }, expectError: true, @@ -525,7 +520,16 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Type: v1alpha1.PipelineTypeFilterWeigher, + SchedulingDomain: v1alpha1.SchedulingDomainManila, + Steps: tt.steps, + }, + }) if tt.expectError && err == nil { t.Error("Expected error but got none") diff --git a/internal/scheduling/decisions/nova/pipeline_controller.go b/internal/scheduling/decisions/nova/pipeline_controller.go index 68bdd6c7..d47aa7df 100644 --- a/internal/scheduling/decisions/nova/pipeline_controller.go +++ b/internal/scheduling/decisions/nova/pipeline_controller.go @@ -8,7 +8,6 @@ import ( "encoding/json" "errors" "fmt" - "slices" "sync" "time" @@ -132,11 +131,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al // The base controller will delegate the pipeline creation down to this method. func (c *DecisionPipelineController) InitPipeline( ctx context.Context, - name string, - steps []v1alpha1.Step, + p v1alpha1.Pipeline, ) (lib.Pipeline[api.ExternalSchedulerRequest], error) { - return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor) + return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) } func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { @@ -163,29 +161,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl * return pipeline.Spec.Type == c.PipelineType() }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeFilter, - v1alpha1.StepTypeWeigher, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, diff --git a/internal/scheduling/decisions/nova/pipeline_controller_test.go b/internal/scheduling/decisions/nova/pipeline_controller_test.go index 2b6c637d..7a53e274 100644 --- a/internal/scheduling/decisions/nova/pipeline_controller_test.go +++ b/internal/scheduling/decisions/nova/pipeline_controller_test.go @@ -93,7 +93,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: false, @@ -122,7 +122,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: true, @@ -175,7 +175,7 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, expectError: true, @@ -209,7 +209,12 @@ func TestDecisionPipelineController_Reconcile(t *testing.T) { } if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(t.Context(), tt.pipeline.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: tt.pipeline.Name, + }, + Spec: tt.pipeline.Spec, + }) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } @@ -268,57 +273,42 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool }{ { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, { name: "supported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "filter_status_conditions", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", - }, + Type: v1alpha1.StepTypeFilter, + Impl: "filter_status_conditions", }, }, expectError: false, }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "unsupported-plugin", - }, + Type: v1alpha1.StepTypeFilter, + Impl: "unsupported-plugin", }, }, expectError: true, }, { name: "step with scoping options", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "scoped-filter", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", - Opts: runtime.RawExtension{ - Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`), - }, + Type: v1alpha1.StepTypeFilter, + Impl: "filter_status_conditions", + Opts: runtime.RawExtension{ + Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`), }, }, }, @@ -326,17 +316,12 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { }, { name: "step with invalid scoping options", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "invalid-scoped-filter", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeFilter, - Impl: "filter_status_conditions", - Opts: runtime.RawExtension{ - Raw: []byte(`invalid json`), - }, + Type: v1alpha1.StepTypeFilter, + Impl: "filter_status_conditions", + Opts: runtime.RawExtension{ + Raw: []byte(`invalid json`), }, }, }, @@ -346,7 +331,14 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), "test-pipeline", tt.steps) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Steps: tt.steps, + }, + }) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -437,7 +429,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -448,7 +440,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, setupPipelineConfigs: true, @@ -484,7 +476,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -495,7 +487,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, setupPipelineConfigs: true, @@ -556,7 +548,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, pipelineConf: &v1alpha1.Pipeline{ @@ -567,7 +559,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, setupPipelineConfigs: true, @@ -605,7 +597,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, setupPipelineConfigs: true, @@ -643,7 +635,7 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainNova, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, setupPipelineConfigs: true, @@ -689,7 +681,12 @@ func TestDecisionPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { // Setup runtime pipeline if needed if tt.pipeline != nil { - pipeline, err := controller.InitPipeline(context.Background(), tt.pipeline.Name, []v1alpha1.Step{}) + pipeline, err := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: tt.pipeline.Name, + }, + Spec: tt.pipeline.Spec, + }) if err != nil { t.Fatalf("Failed to init pipeline: %v", err) } diff --git a/internal/scheduling/decisions/pods/noop.go b/internal/scheduling/decisions/pods/noop.go index ec7eece1..55f04174 100644 --- a/internal/scheduling/decisions/pods/noop.go +++ b/internal/scheduling/decisions/pods/noop.go @@ -15,7 +15,7 @@ type NoopFilter struct { Alias string } -func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (f *NoopFilter) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return nil } diff --git a/internal/scheduling/decisions/pods/pipeline_controller.go b/internal/scheduling/decisions/pods/pipeline_controller.go index a969a36c..dc9312c0 100644 --- a/internal/scheduling/decisions/pods/pipeline_controller.go +++ b/internal/scheduling/decisions/pods/pipeline_controller.go @@ -7,7 +7,6 @@ import ( "context" "errors" "fmt" - "slices" "sync" "time" @@ -184,11 +183,10 @@ func (c *DecisionPipelineController) process(ctx context.Context, decision *v1al // The base controller will delegate the pipeline creation down to this method. func (c *DecisionPipelineController) InitPipeline( ctx context.Context, - name string, - steps []v1alpha1.Step, + p v1alpha1.Pipeline, ) (lib.Pipeline[pods.PodPipelineRequest], error) { - return lib.NewPipeline(ctx, c.Client, name, supportedSteps, steps, c.Monitor) + return lib.NewPipeline(ctx, c.Client, p.Name, supportedSteps, p.Spec.Steps, c.Monitor) } func (c *DecisionPipelineController) handlePod() handler.EventHandler { @@ -268,29 +266,6 @@ func (c *DecisionPipelineController) SetupWithManager(mgr manager.Manager, mcl * return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeFilter, - v1alpha1.StepTypeWeigher, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). Named("cortex-pod-scheduler"). For( &v1alpha1.Decision{}, diff --git a/internal/scheduling/decisions/pods/pipeline_controller_test.go b/internal/scheduling/decisions/pods/pipeline_controller_test.go index 2521952d..63642b26 100644 --- a/internal/scheduling/decisions/pods/pipeline_controller_test.go +++ b/internal/scheduling/decisions/pods/pipeline_controller_test.go @@ -190,34 +190,30 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool }{ { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, { name: "noop step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - Spec: v1alpha1.StepSpec{ - Impl: "noop", - Type: v1alpha1.StepTypeFilter, - }, + Impl: "noop", + Type: v1alpha1.StepTypeFilter, }, }, expectError: false, }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - Spec: v1alpha1.StepSpec{ - Impl: "unsupported", - Type: v1alpha1.StepTypeFilter, - }, + Impl: "unsupported", + Type: v1alpha1.StepTypeFilter, }, }, expectError: true, @@ -226,7 +222,14 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline, err := controller.InitPipeline(t.Context(), "test", tt.steps) + pipeline, err := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + Steps: tt.steps, + }, + }) if tt.expectError && err == nil { t.Error("expected error but got none") @@ -292,7 +295,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, @@ -325,7 +328,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: false, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: false, @@ -371,7 +374,7 @@ func TestDecisionPipelineController_ProcessNewPod(t *testing.T) { Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainPods, CreateDecisions: true, - Steps: []v1alpha1.StepInPipeline{}, + Steps: []v1alpha1.StepSpec{}, }, }, createDecisions: true, diff --git a/internal/scheduling/descheduling/nova/monitor.go b/internal/scheduling/descheduling/nova/monitor.go index 998a73f6..239c2f92 100644 --- a/internal/scheduling/descheduling/nova/monitor.go +++ b/internal/scheduling/descheduling/nova/monitor.go @@ -83,8 +83,8 @@ type StepMonitor struct { } // Monitor a step by wrapping it with a StepMonitor. -func monitorStep(step Step, conf v1alpha1.Step, monitor Monitor) StepMonitor { - name := conf.Namespace + "/" + conf.Name +func monitorStep(step Step, conf v1alpha1.StepSpec, monitor Monitor) StepMonitor { + name := conf.Impl var runTimer prometheus.Observer if monitor.stepRunTimer != nil { runTimer = monitor.stepRunTimer.WithLabelValues(name) @@ -102,7 +102,7 @@ func monitorStep(step Step, conf v1alpha1.Step, monitor Monitor) StepMonitor { } // Initialize the step with the database and options. -func (m StepMonitor) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m StepMonitor) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return m.step.Init(ctx, client, step) } diff --git a/internal/scheduling/descheduling/nova/monitor_test.go b/internal/scheduling/descheduling/nova/monitor_test.go index 4ee16c1a..7c665af0 100644 --- a/internal/scheduling/descheduling/nova/monitor_test.go +++ b/internal/scheduling/descheduling/nova/monitor_test.go @@ -12,7 +12,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -81,7 +80,7 @@ type mockMonitorStep struct { runCalled bool } -func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m *mockMonitorStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { m.initCalled = true return m.initError } @@ -98,7 +97,7 @@ func TestMonitorStep(t *testing.T) { {VMID: "vm1", Reason: "test"}, }, } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) @@ -118,12 +117,12 @@ func TestMonitorStep(t *testing.T) { func TestStepMonitor_Init(t *testing.T) { monitor := NewPipelineMonitor() step := &mockMonitorStep{} - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) client := fake.NewClientBuilder().Build() - err := monitoredStep.Init(t.Context(), client, conf) + err := monitoredStep.Init(context.Background(), client, conf) if err != nil { t.Errorf("unexpected error: %v", err) @@ -140,11 +139,11 @@ func TestStepMonitor_Init_WithError(t *testing.T) { step := &mockMonitorStep{ initError: expectedErr, } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) client := fake.NewClientBuilder().Build() - err := monitoredStep.Init(t.Context(), client, conf) + err := monitoredStep.Init(context.Background(), client, conf) if !errors.Is(err, expectedErr) { t.Errorf("expected error %v, got %v", expectedErr, err) @@ -160,7 +159,7 @@ func TestStepMonitor_Run(t *testing.T) { step := &mockMonitorStep{ decisions: decisions, } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) result, err := monitoredStep.Run() @@ -178,7 +177,7 @@ func TestStepMonitor_Run(t *testing.T) { } // Verify that the counter was incremented - counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step")) + counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step")) if counterValue != 2.0 { t.Errorf("expected counter value 2.0, got %f", counterValue) } @@ -190,7 +189,7 @@ func TestStepMonitor_Run_WithError(t *testing.T) { step := &mockMonitorStep{ runError: expectedErr, } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) result, err := monitoredStep.Run() @@ -204,7 +203,7 @@ func TestStepMonitor_Run_WithError(t *testing.T) { } // Counter should not be incremented on error - counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step")) + counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step")) if counterValue != 0.0 { t.Errorf("expected counter value 0.0, got %f", counterValue) } @@ -215,7 +214,7 @@ func TestStepMonitor_Run_EmptyResult(t *testing.T) { step := &mockMonitorStep{ decisions: []plugins.Decision{}, // Empty slice } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) result, err := monitoredStep.Run() @@ -229,7 +228,7 @@ func TestStepMonitor_Run_EmptyResult(t *testing.T) { } // Counter should be 0 for empty results - counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("/test-step")) + counterValue := testutil.ToFloat64(monitor.stepDeschedulingCounter.WithLabelValues("test-step")) if counterValue != 0.0 { t.Errorf("expected counter value 0.0, got %f", counterValue) } @@ -243,7 +242,7 @@ func TestMonitorStep_WithNilMonitor(t *testing.T) { {VMID: "vm1", Reason: "test"}, }, } - conf := v1alpha1.Step{ObjectMeta: v1.ObjectMeta{Name: "test-step"}} + conf := v1alpha1.StepSpec{Impl: "test-step"} monitoredStep := monitorStep(step, conf, monitor) // Should not panic with nil timers/counters diff --git a/internal/scheduling/descheduling/nova/pipeline.go b/internal/scheduling/descheduling/nova/pipeline.go index 107dc0d3..8bf60b14 100644 --- a/internal/scheduling/descheduling/nova/pipeline.go +++ b/internal/scheduling/descheduling/nova/pipeline.go @@ -33,7 +33,7 @@ type Pipeline struct { func (p *Pipeline) Init( ctx context.Context, - confedSteps []v1alpha1.Step, + confedSteps []v1alpha1.StepSpec, supportedSteps map[string]Step, ) error { @@ -41,18 +41,17 @@ func (p *Pipeline) Init( // Load all steps from the configuration. p.steps = make(map[string]Step, len(confedSteps)) for _, stepConf := range confedSteps { - step, ok := supportedSteps[stepConf.Spec.Impl] + step, ok := supportedSteps[stepConf.Impl] if !ok { - return errors.New("descheduler: unsupported step: " + stepConf.Spec.Impl) + return errors.New("descheduler: unsupported step: " + stepConf.Impl) } step = monitorStep(step, stepConf, p.Monitor) if err := step.Init(ctx, p.Client, stepConf); err != nil { return err } - namespacedName := stepConf.Namespace + "/" + stepConf.Name - p.steps[namespacedName] = step - p.order = append(p.order, namespacedName) - slog.Info("descheduler: added step", "name", namespacedName) + p.steps[stepConf.Impl] = step + p.order = append(p.order, stepConf.Impl) + slog.Info("descheduler: added step", "name", stepConf.Impl) } return nil } @@ -67,7 +66,7 @@ func (p *Pipeline) run() map[string][]plugins.Decision { var lock sync.Mutex decisionsByStep := map[string][]plugins.Decision{} var wg sync.WaitGroup - for namespacedName, step := range p.steps { + for stepName, step := range p.steps { wg.Go(func() { slog.Info("descheduler: running step") decisions, err := step.Run() @@ -82,7 +81,7 @@ func (p *Pipeline) run() map[string][]plugins.Decision { slog.Info("descheduler: finished step") lock.Lock() defer lock.Unlock() - decisionsByStep[namespacedName] = decisions + decisionsByStep[stepName] = decisions }) } wg.Wait() diff --git a/internal/scheduling/descheduling/nova/pipeline_controller.go b/internal/scheduling/descheduling/nova/pipeline_controller.go index a39c9f34..4a357a5e 100644 --- a/internal/scheduling/descheduling/nova/pipeline_controller.go +++ b/internal/scheduling/descheduling/nova/pipeline_controller.go @@ -6,7 +6,6 @@ package nova import ( "context" "log/slog" - "slices" "time" "github.com/cobaltcore-dev/cortex/api/v1alpha1" @@ -47,13 +46,13 @@ func (c *DeschedulingsPipelineController) PipelineType() v1alpha1.PipelineType { } // The base controller will delegate the pipeline creation down to this method. -func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (*Pipeline, error) { +func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (*Pipeline, error) { pipeline := &Pipeline{ Client: c.Client, CycleDetector: c.CycleDetector, - Monitor: c.Monitor.SubPipeline(name), + Monitor: c.Monitor.SubPipeline(p.Name), } - err := pipeline.Init(ctx, steps, supportedSteps) + err := pipeline.Init(ctx, p.Spec.Steps, supportedSteps) return pipeline, err } @@ -114,28 +113,6 @@ func (c *DeschedulingsPipelineController) SetupWithManager(mgr ctrl.Manager, mcl return pipeline.Spec.Type == c.PipelineType() }), ). - // Watch step changes so that we can turn on/off pipelines depending on - // unready steps. - WatchesMulticluster( - &v1alpha1.Step{}, - handler.Funcs{ - CreateFunc: c.HandleStepCreated, - UpdateFunc: c.HandleStepUpdated, - DeleteFunc: c.HandleStepDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - step := obj.(*v1alpha1.Step) - // Only react to steps matching the scheduling domain. - if step.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - // Only react to filter and weigher steps. - supportedTypes := []v1alpha1.StepType{ - v1alpha1.StepTypeDescheduler, - } - return slices.Contains(supportedTypes, step.Spec.Type) - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, diff --git a/internal/scheduling/descheduling/nova/pipeline_controller_test.go b/internal/scheduling/descheduling/nova/pipeline_controller_test.go index 16783e9a..78369e5a 100644 --- a/internal/scheduling/descheduling/nova/pipeline_controller_test.go +++ b/internal/scheduling/descheduling/nova/pipeline_controller_test.go @@ -12,7 +12,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/pkg/conf" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,43 +33,34 @@ type mockControllerStep struct{} func (m *mockControllerStep) Run() ([]plugins.Decision, error) { return nil, nil } -func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m *mockControllerStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return nil } func TestDeschedulingsPipelineController_InitPipeline(t *testing.T) { tests := []struct { name string - steps []v1alpha1.Step + steps []v1alpha1.StepSpec expectError bool expectedError string }{ { name: "successful pipeline initialization", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeDescheduler, - Impl: "mock-step", - }, + Type: v1alpha1.StepTypeDescheduler, + Impl: "mock-step", }, }, expectError: false, }, { name: "unsupported step", - steps: []v1alpha1.Step{ + steps: []v1alpha1.StepSpec{ { - ObjectMeta: metav1.ObjectMeta{ - Name: "unsupported-step", - }, - Spec: v1alpha1.StepSpec{ - Type: v1alpha1.StepTypeDescheduler, - Impl: "unsupported", - }, + + Type: v1alpha1.StepTypeDescheduler, + Impl: "unsupported", }, }, expectError: true, @@ -78,7 +68,7 @@ func TestDeschedulingsPipelineController_InitPipeline(t *testing.T) { }, { name: "empty steps", - steps: []v1alpha1.Step{}, + steps: []v1alpha1.StepSpec{}, expectError: false, }, } diff --git a/internal/scheduling/descheduling/nova/pipeline_test.go b/internal/scheduling/descheduling/nova/pipeline_test.go index d83d278b..c5e00b9f 100644 --- a/internal/scheduling/descheduling/nova/pipeline_test.go +++ b/internal/scheduling/descheduling/nova/pipeline_test.go @@ -11,7 +11,6 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/descheduling/nova/plugins" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -31,7 +30,7 @@ func (m *mockPipelineStep) Run() ([]plugins.Decision, error) { return m.decisions, nil } -func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { if m.initError != nil { return m.initError } @@ -43,7 +42,7 @@ func TestPipeline_Init(t *testing.T) { tests := []struct { name string supportedSteps map[string]Step - confedSteps []v1alpha1.Step + confedSteps []v1alpha1.StepSpec expectedSteps int expectedError bool }{ @@ -52,12 +51,10 @@ func TestPipeline_Init(t *testing.T) { supportedSteps: map[string]Step{ "test-step": &mockPipelineStep{}, }, - confedSteps: []v1alpha1.Step{ - {ObjectMeta: v1.ObjectMeta{Name: "step1"}, Spec: v1alpha1.StepSpec{ - Impl: "test-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - }, + confedSteps: []v1alpha1.StepSpec{{ + Impl: "test-step", + Type: v1alpha1.StepTypeDescheduler, + }}, expectedSteps: 1, }, { @@ -65,12 +62,10 @@ func TestPipeline_Init(t *testing.T) { supportedSteps: map[string]Step{ "test-step": &mockPipelineStep{}, }, - confedSteps: []v1alpha1.Step{ - {ObjectMeta: v1.ObjectMeta{Name: "step2"}, Spec: v1alpha1.StepSpec{ - Impl: "unsupported-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - }, + confedSteps: []v1alpha1.StepSpec{{ + Impl: "unsupported-step", + Type: v1alpha1.StepTypeDescheduler, + }}, expectedError: true, }, { @@ -78,12 +73,10 @@ func TestPipeline_Init(t *testing.T) { supportedSteps: map[string]Step{ "failing-step": &mockPipelineStep{initError: errors.New("init failed")}, }, - confedSteps: []v1alpha1.Step{ - {ObjectMeta: v1.ObjectMeta{Name: "step3"}, Spec: v1alpha1.StepSpec{ - Impl: "failing-step", - Type: v1alpha1.StepTypeDescheduler, - }}, - }, + confedSteps: []v1alpha1.StepSpec{{ + Impl: "failing-step", + Type: v1alpha1.StepTypeDescheduler, + }}, expectedError: true, }, { @@ -92,15 +85,15 @@ func TestPipeline_Init(t *testing.T) { "step1": &mockPipelineStep{}, "step2": &mockPipelineStep{}, }, - confedSteps: []v1alpha1.Step{ - {ObjectMeta: v1.ObjectMeta{Name: "step1"}, Spec: v1alpha1.StepSpec{ + confedSteps: []v1alpha1.StepSpec{ + { Impl: "step1", Type: v1alpha1.StepTypeDescheduler, - }}, - {ObjectMeta: v1.ObjectMeta{Name: "step2"}, Spec: v1alpha1.StepSpec{ + }, + { Impl: "step2", Type: v1alpha1.StepTypeDescheduler, - }}, + }, }, expectedSteps: 2, }, diff --git a/internal/scheduling/descheduling/nova/plugins/base.go b/internal/scheduling/descheduling/nova/plugins/base.go index 1ba37d25..f312f402 100644 --- a/internal/scheduling/descheduling/nova/plugins/base.go +++ b/internal/scheduling/descheduling/nova/plugins/base.go @@ -21,8 +21,8 @@ type BaseStep[Opts any] struct { } // Init the step with the database and options. -func (s *BaseStep[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { - opts := conf.NewRawOptsBytes(step.Spec.Opts.Raw) +func (s *BaseStep[Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { + opts := conf.NewRawOptsBytes(step.Opts.Raw) if err := s.Load(opts); err != nil { return err } diff --git a/internal/scheduling/descheduling/nova/plugins/base_test.go b/internal/scheduling/descheduling/nova/plugins/base_test.go index 60830cf7..f646523b 100644 --- a/internal/scheduling/descheduling/nova/plugins/base_test.go +++ b/internal/scheduling/descheduling/nova/plugins/base_test.go @@ -23,13 +23,11 @@ func (o MockOptions) Validate() error { func TestBaseStep_Init(t *testing.T) { step := BaseStep[MockOptions]{} cl := fake.NewClientBuilder().Build() - err := step.Init(t.Context(), cl, v1alpha1.Step{ - Spec: v1alpha1.StepSpec{ - Opts: runtime.RawExtension{Raw: []byte(`{ - "option1": "value1", - "option2": 2 - }`)}, - }, + err := step.Init(t.Context(), cl, v1alpha1.StepSpec{ + Opts: runtime.RawExtension{Raw: []byte(`{ + "option1": "value1", + "option2": 2 + }`)}, }) if err != nil { t.Fatalf("expected no error, got %v", err) diff --git a/internal/scheduling/descheduling/nova/step.go b/internal/scheduling/descheduling/nova/step.go index c9b16b76..7c53bc99 100644 --- a/internal/scheduling/descheduling/nova/step.go +++ b/internal/scheduling/descheduling/nova/step.go @@ -21,5 +21,5 @@ type Step interface { // Get the VMs on their current hosts that should be considered for descheduling. Run() ([]plugins.Decision, error) // Configure the step with a database and options. - Init(ctx context.Context, client client.Client, step v1alpha1.Step) error + Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error } diff --git a/internal/scheduling/lib/pipeline.go b/internal/scheduling/lib/pipeline.go index b1a43363..48b05b08 100644 --- a/internal/scheduling/lib/pipeline.go +++ b/internal/scheduling/lib/pipeline.go @@ -14,7 +14,6 @@ import ( "sync" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -39,7 +38,7 @@ type pipeline[RequestType PipelineRequest] struct { type StepWrapper[RequestType PipelineRequest] func( ctx context.Context, client client.Client, - step v1alpha1.Step, + step v1alpha1.StepSpec, impl Step[RequestType], ) (Step[RequestType], error) @@ -49,7 +48,7 @@ func NewPipeline[RequestType PipelineRequest]( client client.Client, name string, supportedSteps map[string]func() Step[RequestType], - confedSteps []v1alpha1.Step, + confedSteps []v1alpha1.StepSpec, monitor PipelineMonitor, ) (Pipeline[RequestType], error) { @@ -60,26 +59,25 @@ func NewPipeline[RequestType PipelineRequest]( pipelineMonitor := monitor.SubPipeline(name) for _, stepConfig := range confedSteps { - slog.Info("scheduler: configuring step", "name", stepConfig.Name, "impl", stepConfig.Spec.Impl) + slog.Info("scheduler: configuring step", "name", stepConfig.Impl) slog.Info("supported:", "steps", maps.Keys(supportedSteps)) - makeStep, ok := supportedSteps[stepConfig.Spec.Impl] + makeStep, ok := supportedSteps[stepConfig.Impl] if !ok { - return nil, errors.New("unsupported scheduler step impl: " + stepConfig.Spec.Impl) + return nil, errors.New("unsupported scheduler step impl: " + stepConfig.Impl) } step := makeStep() - if stepConfig.Spec.Type == v1alpha1.StepTypeWeigher && stepConfig.Spec.Weigher != nil { - step = validateStep(step, stepConfig.Spec.Weigher.DisabledValidations) + if stepConfig.Type == v1alpha1.StepTypeWeigher && stepConfig.Weigher != nil { + step = validateStep(step, stepConfig.Weigher.DisabledValidations) } step = monitorStep(ctx, client, stepConfig, step, pipelineMonitor) if err := step.Init(ctx, client, stepConfig); err != nil { return nil, errors.New("failed to initialize pipeline step: " + err.Error()) } - stepsByName[stepConfig.Name] = step - order = append(order, stepConfig.Name) + stepsByName[stepConfig.Impl] = step + order = append(order, stepConfig.Impl) slog.Info( "scheduler: added step", - "name", stepConfig.Name, - "impl", stepConfig.Spec.Impl, + "name", stepConfig.Impl, ) } return &pipeline[RequestType]{ @@ -205,7 +203,7 @@ func (p *pipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResul for _, stepName := range p.order { if activations, ok := stepWeights[stepName]; ok { result.StepResults = append(result.StepResults, v1alpha1.StepResult{ - StepRef: corev1.ObjectReference{Name: stepName}, + StepName: stepName, Activations: activations, }) } diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index a96f2ee2..73d44b3a 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -26,7 +26,7 @@ type PipelineInitializer[PipelineType any] interface { // This method is delegated to the parent controller, when a pipeline needs // to be newly initialized or re-initialized to update it in the pipeline // map. - InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (PipelineType, error) + InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (PipelineType, error) // Get the accepted pipeline type for this controller. // // This is used to filter pipelines when listing existing pipelines on @@ -87,49 +87,37 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( } log := ctrl.LoggerFrom(ctx) old := obj.DeepCopy() - // Get all configured steps for the pipeline. - var steps []v1alpha1.Step - obj.Status.TotalSteps, obj.Status.ReadySteps = len(obj.Spec.Steps), 0 - var err error + + // Check if all steps are ready. If not, check if the step is mandatory. + obj.Status.TotalSteps = len(obj.Spec.Steps) + obj.Status.ReadySteps = 0 for _, step := range obj.Spec.Steps { - stepConf := &v1alpha1.Step{} - log.Info("checking step for pipeline", "pipelineName", obj.Name, "stepName", step.Ref.Name) - if err = c.Get(ctx, client.ObjectKey{ - Name: step.Ref.Name, - Namespace: step.Ref.Namespace, - }, stepConf); err != nil { - err = fmt.Errorf("failed to get step %s: %w", step.Ref.Name, err) + err := c.checkStepReady(ctx, &step) + if err == nil { + obj.Status.ReadySteps++ continue } - if !stepConf.Status.Ready { - if step.Mandatory { - err = fmt.Errorf("mandatory step %s not ready", step.Ref.Name) + if step.Mandatory { + obj.Status.Ready = false + meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ + Type: v1alpha1.PipelineConditionError, + Status: metav1.ConditionTrue, + Reason: "MandatoryStepNotReady", + Message: fmt.Sprintf("mandatory step %s not ready: %s", step.Impl, err.Error()), + }) + patch := client.MergeFrom(old) + if err := c.Status().Patch(ctx, obj, patch); err != nil { + log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name) } - log.Info("step not ready", "pipelineName", obj.Name, "stepName", step.Ref.Name) - continue + delete(c.Pipelines, obj.Name) + delete(c.PipelineConfigs, obj.Name) + return } - obj.Status.ReadySteps++ - steps = append(steps, *stepConf) } obj.Status.StepsReadyFrac = fmt.Sprintf("%d/%d", obj.Status.ReadySteps, obj.Status.TotalSteps) - if err != nil { - log.Error(err, "pipeline not ready due to step issues", "pipelineName", obj.Name) - obj.Status.Ready = false - meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ - Type: v1alpha1.StepConditionError, - Status: metav1.ConditionTrue, - Reason: "StepNotReady", - Message: err.Error(), - }) - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, obj, patch); err != nil { - log.Error(err, "failed to patch pipeline status", "pipelineName", obj.Name) - } - delete(c.Pipelines, obj.Name) - delete(c.PipelineConfigs, obj.Name) - return - } - c.Pipelines[obj.Name], err = c.Initializer.InitPipeline(ctx, obj.Name, steps) + + var err error + c.Pipelines[obj.Name], err = c.Initializer.InitPipeline(ctx, *obj) c.PipelineConfigs[obj.Name] = *obj if err != nil { log.Error(err, "failed to create pipeline", "pipelineName", obj.Name) @@ -200,22 +188,17 @@ func (c *BasePipelineController[PipelineType]) HandlePipelineDeleted( delete(c.PipelineConfigs, pipelineConf.Name) } -// Handle a step creation or update event from watching step resources. -func (c *BasePipelineController[PipelineType]) handleStepChange( +// Check if a step is ready, and if not, return an error indicating why not. +func (c *BasePipelineController[PipelineType]) checkStepReady( ctx context.Context, - obj *v1alpha1.Step, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { + obj *v1alpha1.StepSpec, +) error { - if obj.Spec.SchedulingDomain != c.SchedulingDomain { - return - } log := ctrl.LoggerFrom(ctx) // Check the status of all knowledges depending on this step. - old := obj.DeepCopy() - obj.Status.ReadyKnowledges = 0 - obj.Status.TotalKnowledges = len(obj.Spec.Knowledges) - for _, knowledgeRef := range obj.Spec.Knowledges { + readyKnowledges := 0 + totalKnowledges := len(obj.Knowledges) + for _, knowledgeRef := range obj.Knowledges { knowledge := &v1alpha1.Knowledge{} if err := c.Get(ctx, client.ObjectKey{ Name: knowledgeRef.Name, @@ -233,108 +216,15 @@ func (c *BasePipelineController[PipelineType]) handleStepChange( log.Info("knowledge not ready, no data available", "knowledgeName", knowledgeRef.Name) continue } - obj.Status.ReadyKnowledges++ - } - obj.Status.KnowledgesReadyFrac = fmt.Sprintf("%d/%d", obj.Status.ReadyKnowledges, obj.Status.TotalKnowledges) - if obj.Status.ReadyKnowledges != obj.Status.TotalKnowledges { - obj.Status.Ready = false - meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ - Type: v1alpha1.StepConditionError, - Status: metav1.ConditionTrue, - Reason: "KnowledgesNotReady", - Message: "not all knowledges are ready", - }) - log.Info("step not ready, not all knowledges are ready", "stepName", obj.Name) - } else { - obj.Status.Ready = true - meta.RemoveStatusCondition(&obj.Status.Conditions, v1alpha1.StepConditionError) - log.Info("step is ready", "stepName", obj.Name) - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, obj, patch); err != nil { - log.Error(err, "failed to patch step status", "stepName", obj.Name) - return - } - // Find all pipelines depending on this step and re-evaluate them. - var pipelines v1alpha1.PipelineList - if err := c.List(ctx, &pipelines); err != nil { - log.Error(err, "failed to list pipelines for step", "stepName", obj.Name) - return - } - for _, pipeline := range pipelines.Items { - needsUpdate := false - for _, step := range pipeline.Spec.Steps { - if step.Ref.Name == obj.Name && step.Ref.Namespace == obj.Namespace { - needsUpdate = true - break - } - } - if needsUpdate { - c.handlePipelineChange(ctx, &pipeline, queue) - } - } -} - -// Handler bound to a step watch to handle created steps. -// -// This handler will look at the underlying resources of the step and check -// if they are ready. It will then re-evaluate all pipelines depending on the step. -func (c *BasePipelineController[PipelineType]) HandleStepCreated( - ctx context.Context, - evt event.CreateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - stepConf := evt.Object.(*v1alpha1.Step) - c.handleStepChange(ctx, stepConf, queue) -} - -// Handler bound to a step watch to handle updated steps. -// -// This handler will look at the underlying resources of the step and check -// if they are ready. It will then re-evaluate all pipelines depending on the step. -func (c *BasePipelineController[PipelineType]) HandleStepUpdated( - ctx context.Context, - evt event.UpdateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - stepConf := evt.ObjectNew.(*v1alpha1.Step) - c.handleStepChange(ctx, stepConf, queue) -} - -// Handler bound to a step watch to handle deleted steps. -// -// This handler will re-evaluate all pipelines depending on the step. -func (c *BasePipelineController[PipelineType]) HandleStepDeleted( - ctx context.Context, - evt event.DeleteEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - stepConf := evt.Object.(*v1alpha1.Step) - if stepConf.Spec.SchedulingDomain != c.SchedulingDomain { - return + readyKnowledges++ } - // When a step is deleted, we need to re-evaluate all pipelines depending on it. - var pipelines v1alpha1.PipelineList - log := ctrl.LoggerFrom(ctx) - if err := c.List(ctx, &pipelines); err != nil { - log.Error(err, "failed to list pipelines for deleted step", "stepName", stepConf.Name) - return - } - for _, pipeline := range pipelines.Items { - needsUpdate := false - for _, step := range pipeline.Spec.Steps { - if step.Ref.Name == stepConf.Name && step.Ref.Namespace == stepConf.Namespace { - needsUpdate = true - break - } - } - if needsUpdate { - c.handlePipelineChange(ctx, &pipeline, queue) - } + if readyKnowledges != totalKnowledges { + return fmt.Errorf( + "%d/%d knowledges ready", + readyKnowledges, totalKnowledges, + ) } + return nil } // Handle a knowledge creation, update, or delete event from watching knowledge resources. @@ -348,30 +238,33 @@ func (c *BasePipelineController[PipelineType]) handleKnowledgeChange( return } log := ctrl.LoggerFrom(ctx) - log.Info("knowledge changed, re-evaluating dependent steps", "knowledgeName", obj.Name) - // Find all steps depending on this knowledge and re-evaluate them. - var steps v1alpha1.StepList - if err := c.List(ctx, &steps); err != nil { - log.Error(err, "failed to list steps for knowledge", "knowledgeName", obj.Name) + log.Info("knowledge changed, re-evaluating dependent pipelines", "knowledgeName", obj.Name) + // Find all pipelines depending on this knowledge and re-evaluate them. + var pipelines v1alpha1.PipelineList + if err := c.List(ctx, &pipelines); err != nil { + log.Error(err, "failed to list pipelines for knowledge", "knowledgeName", obj.Name) return } - for _, step := range steps.Items { + for _, pipeline := range pipelines.Items { needsUpdate := false - for _, knowledgeRef := range step.Spec.Knowledges { - if knowledgeRef.Name == obj.Name && knowledgeRef.Namespace == obj.Namespace { - needsUpdate = true - break + for _, step := range pipeline.Spec.Steps { + for _, knowledgeRef := range step.Knowledges { + if knowledgeRef.Name == obj.Name && knowledgeRef.Namespace == obj.Namespace { + needsUpdate = true + break + } } } if needsUpdate { - c.handleStepChange(ctx, &step, queue) + log.Info("re-evaluating pipeline due to knowledge change", "pipelineName", pipeline.Name) + c.handlePipelineChange(ctx, &pipeline, queue) } } } // Handler bound to a knowledge watch to handle created knowledges. // -// This handler will re-evaluate all steps depending on the knowledge. +// This handler will re-evaluate all pipelines depending on the knowledge. func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated( ctx context.Context, evt event.CreateEvent, @@ -384,7 +277,7 @@ func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated( // Handler bound to a knowledge watch to handle updated knowledges. // -// This handler will re-evaluate all steps depending on the knowledge. +// This handler will re-evaluate all pipelines depending on the knowledge. func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated( ctx context.Context, evt event.UpdateEvent, @@ -406,7 +299,7 @@ func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated( // Handler bound to a knowledge watch to handle deleted knowledges. // -// This handler will re-evaluate all steps depending on the knowledge. +// This handler will re-evaluate all pipelines depending on the knowledge. func (c *BasePipelineController[PipelineType]) HandleKnowledgeDeleted( ctx context.Context, evt event.DeleteEvent, diff --git a/internal/scheduling/lib/pipeline_controller_test.go b/internal/scheduling/lib/pipeline_controller_test.go index 326f4861..d1642574 100644 --- a/internal/scheduling/lib/pipeline_controller_test.go +++ b/internal/scheduling/lib/pipeline_controller_test.go @@ -5,203 +5,155 @@ package lib import ( "context" - "errors" "testing" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/workqueue" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/event" - "sigs.k8s.io/controller-runtime/pkg/reconcile" "github.com/cobaltcore-dev/cortex/api/v1alpha1" ) // Mock pipeline type for testing type mockPipeline struct { - name string - steps []v1alpha1.Step + name string } -// Mock initializer implementation -type mockInitializer struct { - shouldFail bool - initPipeline func(steps []v1alpha1.Step) (mockPipeline, error) +// Mock PipelineInitializer for testing +type mockPipelineInitializer struct { + pipelineType v1alpha1.PipelineType + initPipelineFunc func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) } -func (m *mockInitializer) PipelineType() v1alpha1.PipelineType { - return "" -} - -func (m *mockInitializer) InitPipeline(ctx context.Context, name string, steps []v1alpha1.Step) (mockPipeline, error) { - if m.shouldFail { - return mockPipeline{}, errors.New("mock initializer error") - } - if m.initPipeline != nil { - return m.initPipeline(steps) - } - return mockPipeline{name: name, steps: steps}, nil -} - -func setupTestScheme() *runtime.Scheme { - scheme := runtime.NewScheme() - err := v1alpha1.AddToScheme(scheme) - if err != nil { - return nil - } - err = v1alpha1.AddToScheme(scheme) - if err != nil { - return nil - } - return scheme -} - -func createTestPipeline(steps []v1alpha1.StepInPipeline) *v1alpha1.Pipeline { - return &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: "test", - Type: "", - Steps: steps, - }, - } -} - -func createTestStep(ready bool, knowledges []corev1.ObjectReference) *v1alpha1.Step { - return &v1alpha1.Step{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-step", - Namespace: "default", - }, - Spec: v1alpha1.StepSpec{ - SchedulingDomain: "test", - Type: v1alpha1.StepTypeFilter, - Impl: "test-impl", - Knowledges: knowledges, - }, - Status: v1alpha1.StepStatus{ - Ready: ready, - ReadyKnowledges: len(knowledges), - TotalKnowledges: len(knowledges), - KnowledgesReadyFrac: "ready", - }, +func (m *mockPipelineInitializer) InitPipeline(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) { + if m.initPipelineFunc != nil { + return m.initPipelineFunc(ctx, p) } + return mockPipeline{name: p.Name}, nil } -func createTestKnowledge(name string, hasError bool, rawLength int) *v1alpha1.Knowledge { - knowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: "test", - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: rawLength, - }, - } - if hasError { - meta.SetStatusCondition(&knowledge.Status.Conditions, metav1.Condition{ - Type: v1alpha1.KnowledgeConditionError, - Status: metav1.ConditionTrue, - Reason: "TestError", - Message: "This is a test error", - }) - } - return knowledge +func (m *mockPipelineInitializer) PipelineType() v1alpha1.PipelineType { + return m.pipelineType } func TestBasePipelineController_InitAllPipelines(t *testing.T) { - scheme := setupTestScheme() + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } tests := []struct { name string existingPipelines []v1alpha1.Pipeline - existingSteps []v1alpha1.Step - initializerFails bool - expectedPipelines int + schedulingDomain v1alpha1.SchedulingDomain + pipelineType v1alpha1.PipelineType + expectedCount int expectError bool }{ { name: "no existing pipelines", existingPipelines: []v1alpha1.Pipeline{}, - expectedPipelines: 0, + schedulingDomain: v1alpha1.SchedulingDomainNova, + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + expectedCount: 0, expectError: false, }, { - name: "single pipeline with ready step", + name: "one matching pipeline", existingPipelines: []v1alpha1.Pipeline{ - *createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", }, - }), - }, - existingSteps: []v1alpha1.Step{ - *createTestStep(true, nil), + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, }, - expectedPipelines: 1, - expectError: false, + schedulingDomain: v1alpha1.SchedulingDomainNova, + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + expectedCount: 1, + expectError: false, }, { - name: "pipeline with non-ready mandatory step", + name: "multiple pipelines, only some matching", existingPipelines: []v1alpha1.Pipeline{ - *createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "matching-pipeline-1", }, - }), - }, - existingSteps: []v1alpha1.Step{ - *createTestStep(false, nil), + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "different-domain-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "different-type-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeDescheduler, + Steps: []v1alpha1.StepSpec{}, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "matching-pipeline-2", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, }, - expectedPipelines: 0, - expectError: false, + schedulingDomain: v1alpha1.SchedulingDomainNova, + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + expectedCount: 2, + expectError: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, 0) + objects := make([]client.Object, len(tt.existingPipelines)) for i := range tt.existingPipelines { - objects = append(objects, &tt.existingPipelines[i]) - } - for i := range tt.existingSteps { - objects = append(objects, &tt.existingSteps[i]) + objects[i] = &tt.existingPipelines[i] } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}). + WithStatusSubresource(&v1alpha1.Pipeline{}). Build() - initializer := &mockInitializer{shouldFail: tt.initializerFails} controller := &BasePipelineController[mockPipeline]{ - Initializer: initializer, - Client: client, - SchedulingDomain: "test", + Client: fakeClient, + SchedulingDomain: tt.schedulingDomain, + Initializer: &mockPipelineInitializer{ + pipelineType: tt.pipelineType, + }, } - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - err := controller.InitAllPipelines(ctx) + err := controller.InitAllPipelines(context.Background()) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -210,485 +162,1028 @@ func TestBasePipelineController_InitAllPipelines(t *testing.T) { t.Errorf("Expected no error but got: %v", err) } - if len(controller.Pipelines) != tt.expectedPipelines { - t.Errorf("Expected %d pipelines, got %d", tt.expectedPipelines, len(controller.Pipelines)) + if len(controller.Pipelines) != tt.expectedCount { + t.Errorf("Expected %d pipelines, got %d", tt.expectedCount, len(controller.Pipelines)) + } + + if len(controller.PipelineConfigs) != tt.expectedCount { + t.Errorf("Expected %d pipeline configs, got %d", tt.expectedCount, len(controller.PipelineConfigs)) } }) } } -func TestBasePipelineController_HandlePipelineCreated(t *testing.T) { - scheme := setupTestScheme() +func TestBasePipelineController_handlePipelineChange(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } tests := []struct { - name string - pipeline *v1alpha1.Pipeline - existingSteps []v1alpha1.Step - initializerFails bool - expectReady bool - expectError bool + name string + pipeline *v1alpha1.Pipeline + knowledges []v1alpha1.Knowledge + schedulingDomain v1alpha1.SchedulingDomain + initPipelineError bool + expectReady bool + expectInMap bool + expectCondition string }{ { - name: "pipeline with ready steps", - pipeline: createTestPipeline([]v1alpha1.StepInPipeline{ + name: "pipeline with all steps ready", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Mandatory: true, + Knowledges: []corev1.ObjectReference{ + {Name: "knowledge-1", Namespace: "default"}, + }, + }, + }, + }, + }, + knowledges: []v1alpha1.Knowledge{ { - Ref: corev1.ObjectReference{ - Name: "test-step", + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge-1", Namespace: "default", }, - Mandatory: true, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, }, - }), - existingSteps: []v1alpha1.Step{ - *createTestStep(true, nil), }, - expectReady: true, - expectError: false, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReady: true, + expectInMap: true, }, { - name: "pipeline with non-ready mandatory step", - pipeline: createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", + name: "pipeline with mandatory step not ready", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline-not-ready", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Mandatory: true, + Knowledges: []corev1.ObjectReference{ + {Name: "missing-knowledge", Namespace: "default"}, + }, + }, }, - Mandatory: true, }, - }), - existingSteps: []v1alpha1.Step{ - *createTestStep(false, nil), }, - expectReady: false, - expectError: true, + knowledges: []v1alpha1.Knowledge{}, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReady: false, + expectInMap: false, + expectCondition: v1alpha1.PipelineConditionError, }, { - name: "pipeline with non-ready optional step", - pipeline: createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", + name: "pipeline with optional step not ready", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline-optional", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Mandatory: false, + Knowledges: []corev1.ObjectReference{ + {Name: "missing-knowledge", Namespace: "default"}, + }, + }, }, - Mandatory: false, }, - }), - existingSteps: []v1alpha1.Step{ - *createTestStep(false, nil), }, - expectReady: true, - expectError: false, + knowledges: []v1alpha1.Knowledge{}, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReady: true, + expectInMap: true, + }, + { + name: "pipeline init fails", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline-init-fail", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, + knowledges: []v1alpha1.Knowledge{}, + schedulingDomain: v1alpha1.SchedulingDomainNova, + initPipelineError: true, + expectReady: false, + expectInMap: false, + expectCondition: v1alpha1.PipelineConditionError, + }, + { + name: "pipeline with different scheduling domain", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline-different-domain", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + }, + knowledges: []v1alpha1.Knowledge{}, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReady: false, + expectInMap: false, }, { - name: "initializer fails to initialize pipeline", - pipeline: createTestPipeline([]v1alpha1.StepInPipeline{ + name: "pipeline with knowledge in error state", + pipeline: &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline-knowledge-error", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Mandatory: true, + Knowledges: []corev1.ObjectReference{ + {Name: "error-knowledge", Namespace: "default"}, + }, + }, + }, + }, + }, + knowledges: []v1alpha1.Knowledge{ { - Ref: corev1.ObjectReference{ - Name: "test-step", + ObjectMeta: metav1.ObjectMeta{ + Name: "error-knowledge", Namespace: "default", }, - Mandatory: true, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionError, + Status: metav1.ConditionTrue, + }, + }, + }, }, - }), - existingSteps: []v1alpha1.Step{ - *createTestStep(true, nil), }, - initializerFails: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, expectReady: false, - expectError: true, + expectInMap: false, + expectCondition: v1alpha1.PipelineConditionError, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, 0) - objects = append(objects, tt.pipeline) - for i := range tt.existingSteps { - objects = append(objects, &tt.existingSteps[i]) + objects := []client.Object{tt.pipeline} + for i := range tt.knowledges { + objects = append(objects, &tt.knowledges[i]) } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}). + WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). Build() - initializer := &mockInitializer{shouldFail: tt.initializerFails} + initializer := &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + } + + if tt.initPipelineError { + initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) (mockPipeline, error) { + return mockPipeline{}, context.Canceled + } + } + controller := &BasePipelineController[mockPipeline]{ - Pipelines: make(map[string]mockPipeline), + Client: fakeClient, + SchedulingDomain: tt.schedulingDomain, Initializer: initializer, - Client: client, - SchedulingDomain: "test", + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - controller.Pipelines = make(map[string]mockPipeline) - controller.PipelineConfigs = make(map[string]v1alpha1.Pipeline) - - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.CreateEvent{Object: tt.pipeline} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - controller.HandlePipelineCreated(ctx, evt, queue) + controller.handlePipelineChange(context.Background(), tt.pipeline, nil) - // Check if pipeline was added to map - _, pipelineExists := controller.Pipelines[tt.pipeline.Name] - if tt.expectReady && !pipelineExists { - t.Error("Expected pipeline to be in map but it wasn't") - } - if !tt.expectReady && pipelineExists { - t.Error("Expected pipeline not to be in map but it was") + // Check if pipeline is in map + _, inMap := controller.Pipelines[tt.pipeline.Name] + if inMap != tt.expectInMap { + t.Errorf("Expected pipeline in map: %v, got: %v", tt.expectInMap, inMap) } - // Verify pipeline status was updated + // Get updated pipeline status var updatedPipeline v1alpha1.Pipeline - err := client.Get(ctx, types.NamespacedName{Name: tt.pipeline.Name}, &updatedPipeline) + err := fakeClient.Get(context.Background(), client.ObjectKey{Name: tt.pipeline.Name}, &updatedPipeline) if err != nil { t.Fatalf("Failed to get updated pipeline: %v", err) } + // Check ready status if updatedPipeline.Status.Ready != tt.expectReady { - t.Errorf("Expected Ready=%v, got %v", tt.expectReady, updatedPipeline.Status.Ready) + t.Errorf("Expected ready status: %v, got: %v", tt.expectReady, updatedPipeline.Status.Ready) } - hasError := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, v1alpha1.PipelineConditionError) - if hasError != tt.expectError { - t.Errorf("Expected Error condition=%v, got %v", tt.expectError, hasError) + // Check condition if specified + if tt.expectCondition != "" { + hasCondition := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, tt.expectCondition) + if !hasCondition { + t.Errorf("Expected condition %s to be true", tt.expectCondition) + } } }) } } -func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) { - scheme := setupTestScheme() +func TestBasePipelineController_HandlePipelineCreated(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } + + pipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + } - pipeline := createTestPipeline(nil) - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(pipeline). + WithStatusSubresource(&v1alpha1.Pipeline{}). Build() - initializer := &mockInitializer{} + controller := &BasePipelineController[mockPipeline]{ + Client: fakeClient, + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + }, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), + } + + evt := event.CreateEvent{ + Object: pipeline, + } + + controller.HandlePipelineCreated(context.Background(), evt, nil) + + if _, exists := controller.Pipelines[pipeline.Name]; !exists { + t.Error("Expected pipeline to be in map after creation") + } +} + +func TestBasePipelineController_HandlePipelineUpdated(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } + + oldPipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{}, + }, + } + + newPipeline := oldPipeline.DeepCopy() + newPipeline.Spec.Description = "Updated description" + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(newPipeline). + WithStatusSubresource(&v1alpha1.Pipeline{}). + Build() + + controller := &BasePipelineController[mockPipeline]{ + Client: fakeClient, + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + }, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), + } + + evt := event.UpdateEvent{ + ObjectOld: oldPipeline, + ObjectNew: newPipeline, + } + + controller.HandlePipelineUpdated(context.Background(), evt, nil) + + if _, exists := controller.Pipelines[newPipeline.Name]; !exists { + t.Error("Expected pipeline to be in map after update") + } +} + +func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } + + pipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + }, + } + controller := &BasePipelineController[mockPipeline]{ Pipelines: map[string]mockPipeline{ "test-pipeline": {name: "test-pipeline"}, }, - Initializer: initializer, - Client: client, - SchedulingDomain: "test", + PipelineConfigs: map[string]v1alpha1.Pipeline{ + "test-pipeline": *pipeline, + }, } - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.DeleteEvent{Object: pipeline} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) + evt := event.DeleteEvent{ + Object: pipeline, + } - controller.HandlePipelineDeleted(ctx, evt, queue) + controller.HandlePipelineDeleted(context.Background(), evt, nil) - if _, exists := controller.Pipelines["test-pipeline"]; exists { - t.Error("Expected pipeline to be removed from map") + if _, exists := controller.Pipelines[pipeline.Name]; exists { + t.Error("Expected pipeline to be removed from map after deletion") + } + if _, exists := controller.PipelineConfigs[pipeline.Name]; exists { + t.Error("Expected pipeline config to be removed from map after deletion") } } -func TestBasePipelineController_HandleStepCreated(t *testing.T) { - scheme := setupTestScheme() +func TestBasePipelineController_checkStepReady(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } tests := []struct { - name string - step *v1alpha1.Step - knowledges []v1alpha1.Knowledge - pipelines []v1alpha1.Pipeline - expectedReady bool - expectedPipelines int + name string + step v1alpha1.StepSpec + knowledges []v1alpha1.Knowledge + expectError bool }{ { - name: "step with ready knowledges", - step: createTestStep(false, []corev1.ObjectReference{ - {Name: "knowledge1", Namespace: "default"}, - }), + name: "step with no knowledge dependencies", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{}, + }, + knowledges: []v1alpha1.Knowledge{}, + expectError: false, + }, + { + name: "step with ready knowledge", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "ready-knowledge", Namespace: "default"}, + }, + }, knowledges: []v1alpha1.Knowledge{ - *createTestKnowledge("knowledge1", false, 10), + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ready-knowledge", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, }, - pipelines: []v1alpha1.Pipeline{ - *createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", + expectError: false, + }, + { + name: "step with knowledge in error state", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "error-knowledge", Namespace: "default"}, + }, + }, + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "error-knowledge", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionError, + Status: metav1.ConditionTrue, + }, }, - Mandatory: true, }, - }), + }, + }, + expectError: true, + }, + { + name: "step with knowledge with no data", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "no-data-knowledge", Namespace: "default"}, + }, + }, + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "no-data-knowledge", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 0, + }, + }, + }, + expectError: true, + }, + { + name: "step with missing knowledge", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "missing-knowledge", Namespace: "default"}, + }, }, - expectedReady: true, - expectedPipelines: 1, + knowledges: []v1alpha1.Knowledge{}, + expectError: true, }, { - name: "step with knowledge error", - step: createTestStep(false, []corev1.ObjectReference{ - {Name: "knowledge1", Namespace: "default"}, - }), + name: "step with multiple knowledges, all ready", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "knowledge-1", Namespace: "default"}, + {Name: "knowledge-2", Namespace: "default"}, + }, + }, knowledges: []v1alpha1.Knowledge{ - *createTestKnowledge("knowledge1", true, 0), + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge-1", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "knowledge-2", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 5, + }, + }, }, - pipelines: []v1alpha1.Pipeline{ - *createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, + expectError: false, + }, + { + name: "step with multiple knowledges, some not ready", + step: v1alpha1.StepSpec{ + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "ready-knowledge", Namespace: "default"}, + {Name: "not-ready-knowledge", Namespace: "default"}, + }, + }, + knowledges: []v1alpha1.Knowledge{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "ready-knowledge", + Namespace: "default", + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "not-ready-knowledge", + Namespace: "default", }, - }), + Status: v1alpha1.KnowledgeStatus{ + RawLength: 0, + }, + }, }, - expectedReady: false, - expectedPipelines: 0, + expectError: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - objects := make([]client.Object, 0) - objects = append(objects, tt.step) + objects := make([]client.Object, len(tt.knowledges)) for i := range tt.knowledges { - objects = append(objects, &tt.knowledges[i]) - } - for i := range tt.pipelines { - objects = append(objects, &tt.pipelines[i]) + objects[i] = &tt.knowledges[i] } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}, &v1alpha1.Knowledge{}). Build() - initializer := &mockInitializer{} controller := &BasePipelineController[mockPipeline]{ - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - Initializer: initializer, - Client: client, - SchedulingDomain: "test", + Client: fakeClient, } - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.CreateEvent{Object: tt.step} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - - controller.HandleStepCreated(ctx, evt, queue) - - // Verify step status was updated - var updatedStep v1alpha1.Step - err := client.Get(ctx, types.NamespacedName{Name: tt.step.Name, Namespace: tt.step.Namespace}, &updatedStep) - if err != nil { - t.Fatalf("Failed to get updated step: %v", err) - } + err := controller.checkStepReady(context.Background(), &tt.step) - if updatedStep.Status.Ready != tt.expectedReady { - t.Errorf("Expected step Ready=%v, got %v", tt.expectedReady, updatedStep.Status.Ready) + if tt.expectError && err == nil { + t.Error("Expected error but got none") } - - // Check if pipelines were updated correctly - if len(controller.Pipelines) != tt.expectedPipelines { - t.Errorf("Expected %d pipelines in map, got %d", tt.expectedPipelines, len(controller.Pipelines)) + if !tt.expectError && err != nil { + t.Errorf("Expected no error but got: %v", err) } }) } } -func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) { - scheme := setupTestScheme() +func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } tests := []struct { - name string - oldKnowledge *v1alpha1.Knowledge - newKnowledge *v1alpha1.Knowledge - shouldTrigger bool + name string + knowledge *v1alpha1.Knowledge + pipelines []v1alpha1.Pipeline + schedulingDomain v1alpha1.SchedulingDomain + expectReEvaluated []string }{ { - name: "error status changed", - oldKnowledge: createTestKnowledge("test-knowledge", false, 10), - newKnowledge: createTestKnowledge("test-knowledge", true, 10), - shouldTrigger: true, - }, - { - name: "data became available", - oldKnowledge: createTestKnowledge("test-knowledge", false, 0), - newKnowledge: createTestKnowledge("test-knowledge", false, 10), - shouldTrigger: true, + name: "knowledge change triggers dependent pipeline re-evaluation", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + pipelines: []v1alpha1.Pipeline{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "dependent-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "test-knowledge", Namespace: "default"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "independent-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "other-knowledge", Namespace: "default"}, + }, + }, + }, + }, + }, + }, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReEvaluated: []string{"dependent-pipeline"}, }, { - name: "no relevant change", - oldKnowledge: createTestKnowledge("test-knowledge", false, 10), - newKnowledge: createTestKnowledge("test-knowledge", false, 15), - shouldTrigger: false, + name: "knowledge change in different scheduling domain", + knowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + }, + }, + pipelines: []v1alpha1.Pipeline{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "nova-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "test-knowledge", Namespace: "default"}, + }, + }, + }, + }, + }, + }, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectReEvaluated: []string{}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - step := createTestStep(false, []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }) - pipeline := createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, - }, - }) - - objects := []client.Object{tt.newKnowledge, step, pipeline} + objects := []client.Object{tt.knowledge} + for i := range tt.pipelines { + objects = append(objects, &tt.pipelines[i]) + } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}, &v1alpha1.Knowledge{}). + WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). Build() - initializer := &mockInitializer{} controller := &BasePipelineController[mockPipeline]{ - Pipelines: make(map[string]mockPipeline), - Initializer: initializer, - Client: client, - SchedulingDomain: "test", - } - controller.Pipelines = make(map[string]mockPipeline) - controller.PipelineConfigs = make(map[string]v1alpha1.Pipeline) - - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.UpdateEvent{ - ObjectOld: tt.oldKnowledge, - ObjectNew: tt.newKnowledge, + Client: fakeClient, + SchedulingDomain: tt.schedulingDomain, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + }, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - controller.HandleKnowledgeUpdated(ctx, evt, queue) + controller.handleKnowledgeChange(context.Background(), tt.knowledge, nil) - // If should trigger, verify step status was updated - if tt.shouldTrigger { - var updatedStep v1alpha1.Step - err := client.Get(ctx, types.NamespacedName{Name: step.Name, Namespace: step.Namespace}, &updatedStep) - if err != nil { - t.Fatalf("Failed to get updated step: %v", err) + // Verify expected pipelines were re-evaluated by checking if they're in the map + for _, expectedName := range tt.expectReEvaluated { + if _, exists := controller.Pipelines[expectedName]; !exists { + t.Errorf("Expected pipeline %s to be re-evaluated", expectedName) } - // Status should have been recalculated } }) } } -func TestBasePipelineController_HandleStepDeleted(t *testing.T) { - scheme := setupTestScheme() +func TestBasePipelineController_HandleKnowledgeCreated(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } - step := createTestStep(true, nil) - pipeline := createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, + knowledge := &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, }, - }) + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + } - // Only include the pipeline in the fake client, not the step (simulating step deletion) - objects := []client.Object{pipeline} + pipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "test-knowledge", Namespace: "default"}, + }, + }, + }, + }, + } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}). + WithObjects(knowledge, pipeline). + WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). Build() - initializer := &mockInitializer{} controller := &BasePipelineController[mockPipeline]{ - Pipelines: map[string]mockPipeline{ - "test-pipeline": {name: "test-pipeline"}, + Client: fakeClient, + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, }, - Initializer: initializer, - Client: client, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.DeleteEvent{Object: step} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - - // Initially pipeline should be in map - if _, exists := controller.Pipelines["test-pipeline"]; !exists { - t.Fatal("Expected pipeline to be in map initially") + evt := event.CreateEvent{ + Object: knowledge, } - controller.HandleStepDeleted(ctx, evt, queue) + controller.HandleKnowledgeCreated(context.Background(), evt, nil) - // The main requirement is that HandleStepDeleted successfully processes the event - // without crashing. The exact behavior depends on implementation details, but - // it should handle the case where a dependent step is deleted gracefully. + // Pipeline should be re-evaluated and added to map + if _, exists := controller.Pipelines[pipeline.Name]; !exists { + t.Error("Expected pipeline to be re-evaluated after knowledge creation") + } +} - // The pipeline may or may not be removed from map depending on the implementation - // but the method should not panic or error +func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } - // Get the pipeline status to verify it was processed - var updatedPipeline v1alpha1.Pipeline - err := client.Get(ctx, types.NamespacedName{Name: pipeline.Name}, &updatedPipeline) - if err != nil { - t.Errorf("Failed to get pipeline after step deletion: %v", err) + tests := []struct { + name string + oldKnowledge *v1alpha1.Knowledge + newKnowledge *v1alpha1.Knowledge + expectReEvaluate bool + }{ + { + name: "error state changed", + oldKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.KnowledgeConditionError, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + newKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + expectReEvaluate: true, + }, + { + name: "data became available", + oldKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 0, + }, + }, + newKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + expectReEvaluate: true, + }, + { + name: "no relevant change", + oldKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 10, + }, + }, + newKnowledge: &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + }, + Status: v1alpha1.KnowledgeStatus{ + RawLength: 15, + }, + }, + expectReEvaluate: false, + }, } - // The status should reflect the current state - either ready with no steps, or not ready with error - // Both are valid depending on how the implementation handles missing mandatory steps + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + pipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Knowledges: []corev1.ObjectReference{ + {Name: "test-knowledge", Namespace: "default"}, + }, + }, + }, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tt.newKnowledge, pipeline). + WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). + Build() + + controller := &BasePipelineController[mockPipeline]{ + Client: fakeClient, + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + }, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), + } + + evt := event.UpdateEvent{ + ObjectOld: tt.oldKnowledge, + ObjectNew: tt.newKnowledge, + } + + controller.HandleKnowledgeUpdated(context.Background(), evt, nil) + + _, exists := controller.Pipelines[pipeline.Name] + if tt.expectReEvaluate && !exists { + t.Error("Expected pipeline to be re-evaluated") + } + if !tt.expectReEvaluate && exists { + t.Error("Expected pipeline not to be re-evaluated") + } + }) + } } func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) { - scheme := setupTestScheme() + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("Failed to add v1alpha1 scheme: %v", err) + } - knowledge := createTestKnowledge("test-knowledge", false, 10) - step := createTestStep(true, []corev1.ObjectReference{ - {Name: "test-knowledge", Namespace: "default"}, - }) - pipeline := createTestPipeline([]v1alpha1.StepInPipeline{ - { - Ref: corev1.ObjectReference{ - Name: "test-step", - Namespace: "default", - }, - Mandatory: true, + knowledge := &v1alpha1.Knowledge{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-knowledge", + Namespace: "default", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, }, - }) + } - objects := []client.Object{step, pipeline} + pipeline := &v1alpha1.Pipeline{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pipeline", + }, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Steps: []v1alpha1.StepSpec{ + { + Type: v1alpha1.StepTypeFilter, + Impl: "test-filter", + Mandatory: true, + Knowledges: []corev1.ObjectReference{ + {Name: "test-knowledge", Namespace: "default"}, + }, + }, + }, + }, + } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Step{}). + WithObjects(pipeline). + WithStatusSubresource(&v1alpha1.Pipeline{}). Build() - initializer := &mockInitializer{} controller := &BasePipelineController[mockPipeline]{ + Client: fakeClient, + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Initializer: &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + }, Pipelines: map[string]mockPipeline{ "test-pipeline": {name: "test-pipeline"}, }, - Initializer: initializer, - Client: client, - SchedulingDomain: "test", + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - ctx := ctrl.LoggerInto(context.Background(), ctrl.Log) - evt := event.DeleteEvent{Object: knowledge} - queue := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[reconcile.Request]()) - - controller.HandleKnowledgeDeleted(ctx, evt, queue) - - // Verify step status was updated (should now be not ready due to missing knowledge) - var updatedStep v1alpha1.Step - err := client.Get(ctx, types.NamespacedName{Name: step.Name, Namespace: step.Namespace}, &updatedStep) - if err != nil { - t.Fatalf("Failed to get updated step: %v", err) + evt := event.DeleteEvent{ + Object: knowledge, } - if updatedStep.Status.Ready { - t.Error("Expected step to be not ready after knowledge deletion") + controller.HandleKnowledgeDeleted(context.Background(), evt, nil) + + // When knowledge is deleted, the pipeline is re-evaluated. + // Since the knowledge is now missing and the step is mandatory, + // the pipeline should be removed from the map. + if _, exists := controller.Pipelines[pipeline.Name]; exists { + t.Error("Expected pipeline to be removed after knowledge deletion due to mandatory step") } } diff --git a/internal/scheduling/lib/pipeline_test.go b/internal/scheduling/lib/pipeline_test.go index de86cc8c..9a865138 100644 --- a/internal/scheduling/lib/pipeline_test.go +++ b/internal/scheduling/lib/pipeline_test.go @@ -18,7 +18,7 @@ type mockPipelineStep struct { name string } -func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m *mockPipelineStep) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return nil } diff --git a/internal/scheduling/lib/step.go b/internal/scheduling/lib/step.go index 845b3306..a25c55a2 100644 --- a/internal/scheduling/lib/step.go +++ b/internal/scheduling/lib/step.go @@ -32,7 +32,7 @@ func (EmptyStepOpts) Validate() error { return nil } // Interface for a scheduler step. type Step[RequestType PipelineRequest] interface { // Configure the step and initialize things like a database connection. - Init(ctx context.Context, client client.Client, step v1alpha1.Step) error + Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error // Run this step of the scheduling pipeline. // Return a map of keys to activation values. Important: keys that are // not in the map are considered as filtered out. @@ -53,8 +53,8 @@ type BaseStep[RequestType PipelineRequest, Opts StepOpts] struct { } // Init the step with the database and options. -func (s *BaseStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { - opts := conf.NewRawOptsBytes(step.Spec.Opts.Raw) +func (s *BaseStep[RequestType, Opts]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { + opts := conf.NewRawOptsBytes(step.Opts.Raw) if err := s.Load(opts); err != nil { return err } diff --git a/internal/scheduling/lib/step_monitor.go b/internal/scheduling/lib/step_monitor.go index ea6c8f84..ed6a79bd 100644 --- a/internal/scheduling/lib/step_monitor.go +++ b/internal/scheduling/lib/step_monitor.go @@ -44,7 +44,7 @@ type StepMonitor[RequestType PipelineRequest] struct { } // Initialize the wrapped step with the database and options. -func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return s.Step.Init(ctx, client, step) } @@ -52,12 +52,12 @@ func (s *StepMonitor[RequestType]) Init(ctx context.Context, client client.Clien func monitorStep[RequestType PipelineRequest]( _ context.Context, _ client.Client, - step v1alpha1.Step, + step v1alpha1.StepSpec, impl Step[RequestType], m PipelineMonitor, ) *StepMonitor[RequestType] { - stepName := step.Namespace + "/" + step.Name + stepName := step.Impl var runTimer prometheus.Observer if m.stepRunTimer != nil { runTimer = m.stepRunTimer. diff --git a/internal/scheduling/lib/step_test.go b/internal/scheduling/lib/step_test.go index 275d2a54..31d335cd 100644 --- a/internal/scheduling/lib/step_test.go +++ b/internal/scheduling/lib/step_test.go @@ -12,11 +12,11 @@ import ( ) type mockStep[RequestType PipelineRequest] struct { - InitFunc func(ctx context.Context, client client.Client, step v1alpha1.Step) error + InitFunc func(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error RunFunc func(traceLog *slog.Logger, request RequestType) (*StepResult, error) } -func (m *mockStep[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (m *mockStep[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { return m.InitFunc(ctx, client, step) } func (m *mockStep[RequestType]) Run(traceLog *slog.Logger, request RequestType) (*StepResult, error) { diff --git a/internal/scheduling/lib/step_validation.go b/internal/scheduling/lib/step_validation.go index 7ede29e5..638bf6e8 100644 --- a/internal/scheduling/lib/step_validation.go +++ b/internal/scheduling/lib/step_validation.go @@ -22,9 +22,9 @@ type StepValidator[RequestType PipelineRequest] struct { } // Initialize the wrapped step with the database and options. -func (s *StepValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.Step) error { +func (s *StepValidator[RequestType]) Init(ctx context.Context, client client.Client, step v1alpha1.StepSpec) error { slog.Info( - "scheduler: init validation for step", "name", step.Name, + "scheduler: init validation for step", "name", step.Impl, "disabled", s.DisabledValidations, ) return s.Step.Init(ctx, client, step) diff --git a/tools/plutono/provisioning/dashboards/cortex-status.json b/tools/plutono/provisioning/dashboards/cortex-status.json index d3061ecc..fee633ef 100644 --- a/tools/plutono/provisioning/dashboards/cortex-status.json +++ b/tools/plutono/provisioning/dashboards/cortex-status.json @@ -276,7 +276,56 @@ }, "unit": "none" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(71, 71, 71)", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(71, 71, 71)", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -284,12 +333,12 @@ "x": 12, "y": 9 }, - "id": 53, + "id": 55, "options": { "colorMode": "background", "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", + "justifyMode": "auto", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" @@ -298,33 +347,42 @@ "values": true }, "text": {}, - "textMode": "name" + "textMode": "value_and_name" }, "pluginVersion": "7.5.37", "targets": [ { "exemplar": true, - "expr": "max by(step) (cortex_step_state{state!=\"ready\"}) * 0", + "expr": "cortex_decision_state{state=\"success\"}", "format": "time_series", "hide": false, "instant": true, "interval": "", - "legendFormat": "{{step}}", + "legendFormat": "{{state}}", "refId": "A" }, { "exemplar": true, - "expr": "max by(step) (cortex_step_state{state=\"ready\"}) * 1", + "expr": "cortex_decision_state{state!=\"success\",state!=\"waiting\"}", "hide": false, "instant": true, "interval": "", - "legendFormat": "{{step}}", + "legendFormat": "{{state}}", "refId": "B" + }, + { + "exemplar": true, + "expr": "cortex_decision_state{state!=\"success\",state=\"waiting\"}", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{state}}", + "refId": "C" } ], "timeFrom": null, "timeShift": null, - "title": "Step status", + "title": "Decision status", "type": "stat" }, { @@ -401,138 +459,6 @@ "title": "KPI status", "type": "stat" }, - { - "datasource": "prometheus-openstack", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "rgb(66, 66, 66)", - "value": 1 - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(71, 71, 71)", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "C" - }, - "properties": [ - { - "id": "thresholds", - "value": { - "mode": "absolute", - "steps": [ - { - "color": "rgb(71, 71, 71)", - "value": null - }, - { - "color": "red", - "value": 10 - } - ] - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 55, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": true - }, - "text": {}, - "textMode": "value_and_name" - }, - "pluginVersion": "7.5.37", - "targets": [ - { - "exemplar": true, - "expr": "cortex_decision_state{state=\"success\"}", - "format": "time_series", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "A" - }, - { - "exemplar": true, - "expr": "cortex_decision_state{state!=\"success\",state!=\"waiting\"}", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "B" - }, - { - "exemplar": true, - "expr": "cortex_decision_state{state!=\"success\",state=\"waiting\"}", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "{{state}}", - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Decision status", - "type": "stat" - }, { "collapsed": false, "datasource": null,