Skip to content

Commit 307018d

Browse files
authored
Merge pull request #584 from ingvagabund/release-4.19
Backporting 4.20/master fixes to 4.19/KDO 5.2.0 release
2 parents 7afc111 + 9b69f7d commit 307018d

13 files changed

+351
-25
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ The profile exposes the following customization:
189189
- `devActualUtilizationProfile`: Enable load-aware descheduling.
190190
- `devDeviationThresholds`: Have the thresholds be based on the average utilization.
191191

192-
By default, this profile will enable load-aware descheduling based on the `PrometheusCPUCombinedProfile` Prometheus query.
192+
By default, this profile will enable load-aware descheduling based on the `PrometheusCPUCombined` Prometheus query.
193193
By default, the thresholds will be dynamic (based on the distance from the average utilization) and asymmetric (all the nodes below the average will be considered as underutilized to help rebalancing overutilized outliers) tolerating low deviations (10%).
194194

195195
### EvictPodsWithPVC
@@ -227,7 +227,7 @@ The operator provides the following profiles:
227227
- `PrometheusCPUPSIPressureByUtilization`: `avg by (instance) ( rate(node_pressure_cpu_waiting_seconds_total[1m])) and (1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[1m]))) > 0.7 or avg by (instance) ( rate(node_pressure_cpu_waiting_seconds_total[1m])) * 0` (`node_pressure_cpu_waiting_seconds_total` is reported in OpenShift only for nodes configured with psi=1 kernel argument; the query is filtering out PSI pressure on nodes with average CPU utilization < 0.7 to filter out false positives pressure spikes due to self imposed CPU throttling)
228228
- `PrometheusMemoryPSIPressure`: `rate(node_pressure_memory_waiting_seconds_total[1m])` (`node_pressure_memory_waiting_seconds_total` is reported in OpenShift only for nodes configured with psi=1 kernel argument)
229229
- `PrometheusIOPSIPressure`: `rate(node_pressure_io_waiting_seconds_total[1m])` (`node_pressure_memory_waiting_seconds_total` is reported in OpenShift only for nodes configured with psi=1 kernel argument)
230-
- `PrometheusCPUCombinedProfile`: `descheduler:combined_utilization_and_pressure:avg1m` (`descheduler:combined_utilization_and_pressure:avg1m` uses a combination of CPU utilization and CPU PSI pressure based on a recording rule; CPU PSI pressure is reported in OpenShift only for nodes configured with psi=1 kernel argument)
230+
- `PrometheusCPUCombined`: `descheduler:combined_utilization_and_pressure:avg1m` (`descheduler:combined_utilization_and_pressure:avg1m` uses a combination of CPU utilization and CPU PSI pressure based on a recording rule; CPU PSI pressure is reported in OpenShift only for nodes configured with psi=1 kernel argument)
231231

232232
```yaml
233233
apiVersion: operator.openshift.io/v1

pkg/operator/target_config_reconciler.go

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ func (c TargetConfigReconciler) sync() error {
155155
return err
156156
}
157157

158-
if descheduler.Spec.DeschedulingIntervalSeconds == nil {
159-
return fmt.Errorf("descheduler should have an interval set")
158+
if descheduler.Spec.DeschedulingIntervalSeconds == nil || *descheduler.Spec.DeschedulingIntervalSeconds <= 0 {
159+
return fmt.Errorf("descheduler should have an interval set and it should be greater than 0")
160160
}
161161

162162
specAnnotations := map[string]string{
@@ -1301,19 +1301,19 @@ func (c *TargetConfigReconciler) manageConfigMap(descheduler *deschedulerv1.Kube
13011301
}
13021302
}
13031303

1304-
if descheduler.Spec.ProfileCustomizations != nil && descheduler.Spec.ProfileCustomizations.DevActualUtilizationProfile != "" {
1304+
if c.isPrometheusAsMetricsProviderForProfiles(descheduler) {
13051305
// detect the prometheus server url
13061306
route, err := c.routeRouteLister.Routes("openshift-monitoring").Get("prometheus-k8s")
13071307
if err != nil {
1308-
return nil, false, fmt.Errorf("unable to get openshift-monitoring/prometheus-k8s route: %v", err)
1308+
return nil, true, fmt.Errorf("unable to get openshift-monitoring/prometheus-k8s route: %v", err)
13091309
}
13101310
if len(route.Status.Ingress) == 0 {
1311-
return nil, false, fmt.Errorf("No ingress found in openshift-monitoring/prometheus-k8s route")
1311+
return nil, true, fmt.Errorf("No ingress found in openshift-monitoring/prometheus-k8s route")
13121312
}
13131313
if route.Status.Ingress[0].Host == "" {
1314-
return nil, false, fmt.Errorf("Host for status.ingress[0] in openshift-monitoring/prometheus-k8s route is empty")
1314+
return nil, true, fmt.Errorf("Host for status.ingress[0] in openshift-monitoring/prometheus-k8s route is empty")
13151315
}
1316-
err = c.checkNamepsaceMonitoringLabel()
1316+
err = c.checkNamespaceMonitoringLabel()
13171317
if err != nil {
13181318
return nil, false, err
13191319
}
@@ -1640,7 +1640,7 @@ func (c *TargetConfigReconciler) eventHandler() cache.ResourceEventHandler {
16401640
}
16411641
}
16421642

1643-
func (c *TargetConfigReconciler) checkNamepsaceMonitoringLabel() error {
1643+
func (c *TargetConfigReconciler) checkNamespaceMonitoringLabel() error {
16441644
operatorNamespace, err := c.namespaceLister.Get(operatorclient.OperatorNamespace)
16451645
if err != nil {
16461646
klog.ErrorS(err, "error fetching operator namespace")
@@ -1714,3 +1714,14 @@ func (c *TargetConfigReconciler) isSoftTainterNeeded(descheduler *deschedulerv1.
17141714
}
17151715
return leftoverSoftTaints, nil
17161716
}
1717+
1718+
// isPrometheusAsMetricsProviderForProfiles returns true when at least a profile that by default relies on PrometheusMetrics is in use
1719+
// or the user is explicitly configuring DevActualUtilizationProfile profile customization
1720+
func (c *TargetConfigReconciler) isPrometheusAsMetricsProviderForProfiles(descheduler *deschedulerv1.KubeDescheduler) bool {
1721+
if descheduler != nil &&
1722+
(slices.Contains(descheduler.Spec.Profiles, deschedulerv1.RelieveAndMigrate) ||
1723+
(descheduler.Spec.ProfileCustomizations != nil && descheduler.Spec.ProfileCustomizations.DevActualUtilizationProfile != "")) {
1724+
return true
1725+
}
1726+
return false
1727+
}

0 commit comments

Comments
 (0)