Skip to content

Commit 46ea89e

Browse files
authored
Merge pull request #550 from nebius/fix-autohealing-2
fix false positive reboots
2 parents 4ab8e4d + 17650bc commit 46ea89e

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

internal/soperatorchecks/k8s_nodes_controller.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ func (c *K8SNodesController) processRebootCondition(ctx context.Context, k8sNode
180180
logger.Info("no action needed: no reboot reason")
181181
return nil
182182
}
183+
184+
if rebootCondition.Status == corev1.ConditionTrue && degradedCondition.Status == corev1.ConditionTrue &&
185+
rebootCondition.LastTransitionTime.Time.After(degradedCondition.LastTransitionTime.Time) {
186+
187+
logger.Info("no action needed: k8s node already was rebooted")
188+
return nil
189+
}
190+
183191
logger.Info("setting SlurmNodeReboot: true")
184192
return setK8SNodeCondition(ctx, c.Client, k8sNode.Name, newNodeCondition(
185193
consts.SlurmNodeReboot,

0 commit comments

Comments
 (0)