Skip to content

Commit 1ba662b

Browse files
Filter disabled -> filter maintenance + filter status conditions
1 parent eb4fa88 commit 1ba662b

File tree

10 files changed

+699
-251
lines changed

10 files changed

+699
-251
lines changed

helm/bundles/cortex-nova/templates/pipelines.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ spec:
6464
- ref: {name: filter-has-requested-traits}
6565
- ref: {name: filter-has-accelerators}
6666
- ref: {name: filter-correct-az}
67-
- ref: {name: filter-disabled}
67+
- ref: {name: filter-status-conditions}
68+
- ref: {name: filter-maintenance}
6869
- ref: {name: filter-external-customer}
6970
- ref: {name: filter-packed-virtqueue}
7071
- ref: {name: filter-project-aggregates}

helm/bundles/cortex-nova/templates/steps.yaml

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -209,42 +209,36 @@ metadata:
209209
name: filter-correct-az
210210
spec:
211211
operator: cortex-nova
212-
# TODO: Remove this database reference once the scheduler
213-
# step doesn't need it anymore.
214-
databaseSecretRef:
215-
name: cortex-nova-postgres
216-
namespace: {{ .Release.Namespace }}
217212
type: filter
218213
impl: filter_correct_az
219214
description: |
220215
This step will filter out hosts whose aggregate information indicates they
221216
are not placed in the requested availability zone.
222-
knowledges:
223-
# TODO: Remove this dependency since this is a filter which should
224-
# not depend on a potentially non-recent knowledge.
225-
# This should be changed to use the hypervisor CRD.
226-
- name: host-az
227217
---
228218
apiVersion: cortex.cloud/v1alpha1
229219
kind: Step
230220
metadata:
231-
name: filter-disabled
221+
name: filter-status-conditions
232222
spec:
233223
operator: cortex-nova
234-
# TODO: Remove this database reference once the scheduler
235-
# step doesn't need it anymore.
236-
databaseSecretRef:
237-
name: cortex-nova-postgres
238-
namespace: {{ .Release.Namespace }}
239224
type: filter
240-
impl: filter_disabled
225+
impl: filter_status_conditions
241226
description: |
242-
This step will filter out hosts for which the hypervisor status is
243-
`disabled`, the hypervisor state is `down`, or the trait
244-
`COMPUTE_STATUS_DISABLED` is assigned.
245-
# TODO: This step currently depends on traits directly from the datasources.
246-
# This should be changed to use the hypervisor CRD.
247-
knowledges: []
227+
This step will filter out hosts for which the hypervisor status conditions
228+
do not meet the expected values, for example, that the hypervisor is ready
229+
and not disabled.
230+
---
231+
apiVersion: cortex.cloud/v1alpha1
232+
kind: Step
233+
metadata:
234+
name: filter-maintenance
235+
spec:
236+
operator: cortex-nova
237+
type: filter
238+
impl: filter_maintenance
239+
description: |
240+
This step will filter out hosts that are currently in maintenance mode that
241+
prevents scheduling, for example, manual maintenance or termination.
248242
---
249243
apiVersion: cortex.cloud/v1alpha1
250244
kind: Step

internal/scheduling/decisions/nova/pipeline_controller_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,11 +285,11 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
285285
steps: []v1alpha1.Step{
286286
{
287287
ObjectMeta: metav1.ObjectMeta{
288-
Name: "filter_disabled",
288+
Name: "filter_status_conditions",
289289
},
290290
Spec: v1alpha1.StepSpec{
291291
Type: v1alpha1.StepTypeFilter,
292-
Impl: "filter_disabled",
292+
Impl: "filter_status_conditions",
293293
},
294294
},
295295
},
@@ -319,7 +319,7 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
319319
},
320320
Spec: v1alpha1.StepSpec{
321321
Type: v1alpha1.StepTypeFilter,
322-
Impl: "filter_disabled",
322+
Impl: "filter_status_conditions",
323323
Opts: runtime.RawExtension{
324324
Raw: []byte(`{"scope":{"host_capabilities":{"any_of_trait_infixes":["TEST_TRAIT"]}}}`),
325325
},
@@ -337,7 +337,7 @@ func TestDecisionPipelineController_InitPipeline(t *testing.T) {
337337
},
338338
Spec: v1alpha1.StepSpec{
339339
Type: v1alpha1.StepTypeFilter,
340-
Impl: "filter_disabled",
340+
Impl: "filter_status_conditions",
341341
Opts: runtime.RawExtension{
342342
Raw: []byte(`invalid json`),
343343
},

internal/scheduling/decisions/nova/plugins/filters/filter_disabled.go

Lines changed: 0 additions & 47 deletions
This file was deleted.

internal/scheduling/decisions/nova/plugins/filters/filter_disabled_test.go

Lines changed: 0 additions & 175 deletions
This file was deleted.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// Copyright SAP SE
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package filters
5+
6+
import (
7+
"context"
8+
"log/slog"
9+
10+
api "github.com/cobaltcore-dev/cortex/api/delegation/nova"
11+
"github.com/cobaltcore-dev/cortex/internal/scheduling/lib"
12+
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
13+
)
14+
15+
type FilterMaintenanceStep struct {
16+
lib.BaseStep[api.ExternalSchedulerRequest, lib.EmptyStepOpts]
17+
}
18+
19+
// Check that the maintenance spec of the hypervisor doesn't prevent scheduling.
20+
func (s *FilterMaintenanceStep) Run(traceLog *slog.Logger, request api.ExternalSchedulerRequest) (*lib.StepResult, error) {
21+
result := s.PrepareResult(request)
22+
23+
hvs := &hv1.HypervisorList{}
24+
if err := s.Client.List(context.Background(), hvs); err != nil {
25+
traceLog.Error("failed to list hypervisors", "error", err)
26+
return nil, err
27+
}
28+
29+
flagsPreventingScheduling := map[string]bool{
30+
hv1.MaintenanceUnset: false,
31+
hv1.MaintenanceManual: true,
32+
hv1.MaintenanceAuto: false,
33+
hv1.MaintenanceHA: false,
34+
hv1.MaintenanceTermination: true,
35+
}
36+
37+
var hostsReady = make(map[string]struct{})
38+
for _, hv := range hvs.Items {
39+
preventScheduling, ok := flagsPreventingScheduling[hv.Spec.Maintenance]
40+
if !ok {
41+
traceLog.Info(
42+
"hypervisor has unknown maintenance flag, filtering host",
43+
"host", hv.Name, "maintenance", hv.Spec.Maintenance,
44+
)
45+
continue
46+
}
47+
if preventScheduling {
48+
traceLog.Info(
49+
"hypervisor maintenance flag prevents scheduling, filtering host",
50+
"host", hv.Name, "maintenance", hv.Spec.Maintenance,
51+
)
52+
continue
53+
}
54+
hostsReady[hv.Name] = struct{}{}
55+
}
56+
57+
traceLog.Info("hosts passing maintenance filter", "hosts", hostsReady)
58+
for host := range result.Activations {
59+
if _, ok := hostsReady[host]; ok {
60+
continue
61+
}
62+
delete(result.Activations, host)
63+
}
64+
return result, nil
65+
}

0 commit comments

Comments
 (0)