Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"maps"
"reflect"
"sort"
"time"
Expand Down Expand Up @@ -1401,6 +1402,105 @@ func (ctrl *Controller) filterControlPlaneCandidateNodes(pool *mcfgv1.MachineCon
return newCandidates, capacity, nil
}

// filterCustomPoolBootedNodes adjusts the candidate list if a node that directly booted
// into a custom pool is found and updates the node with the appropriate label.
func (ctrl *Controller) filterCustomPoolBootedNodes(candidates []*corev1.Node) []*corev1.Node {
var newCandidates []*corev1.Node
for _, node := range candidates {
isCustomBootNode, poolName := ctrl.isCustomPoolBootedNode(node)
if isCustomBootNode {
if err := ctrl.applyCustomPoolLabels(node, poolName); err != nil {
// best effort, log on failure, keep in candidate list
klog.Errorf("Failed to apply custom pool labels to node %s: %v", node.Name, err)
} else {
// On a successful update of the custom pool label, remove it from the candidate list
klog.Infof("node %s booted on pool %s and has been moved, dropping from candidate list", node.Name, poolName)
continue
}
}
newCandidates = append(newCandidates, node)
}
return newCandidates
}

// isCustomPoolBootedNode checks if a node directly booted into a custom pool
// by checking if it has the FirstPivotMachineConfigAnnotation and if that
// MachineConfig belongs to a custom pool (not master/worker).
// Returns a boolean and associated custom pool name
func (ctrl *Controller) isCustomPoolBootedNode(node *corev1.Node) (bool, string) {

// Check if custom label has already been automatically applied, nothing to do in that case
_, customPoolApplied := node.Annotations[daemonconsts.CustomPoolLabelsAppliedAnnotationKey]
if customPoolApplied {
return false, ""
}

// Get first pivot machineConfig, nothing to do if it doesn't exist
mcName, isFirstBoot := node.Annotations[daemonconsts.FirstPivotMachineConfigAnnotationKey]
if !isFirstBoot {
return false, ""
}

// Get the MachineConfig to check its owner references
mc, err := ctrl.mcLister.Get(mcName)
if err != nil {
klog.V(4).Infof("Failed to get MachineConfig %s: %v", mcName, err)
return false, ""
}

// Check if the MachineConfig has an owner reference to a MachineConfigPool
ownerRefs := mc.GetOwnerReferences()
if len(ownerRefs) == 0 {
klog.V(4).Infof("MachineConfig %s has no owner references", mcName)
return false, ""
}

// Get the pool name from the first owner reference
poolName := ownerRefs[0].Name

// Return true only if this is NOT a standard master or worker pool, along with poolName
return poolName != ctrlcommon.MachineConfigPoolMaster && poolName != ctrlcommon.MachineConfigPoolWorker, poolName
}

// applyCustomPoolLabels applies the node selector labels from a custom MachineConfigPool
// to the node if the rendered MachineConfig belongs to a pool other than master/worker.
func (ctrl *Controller) applyCustomPoolLabels(node *corev1.Node, poolName string) error {

// Get the MachineConfigPool
pool, err := ctrl.mcpLister.Get(poolName)
if err != nil {
return fmt.Errorf("failed to get MachineConfigPool %s: %w", poolName, err)
}

// Extract labels from the pool's node selector
if pool.Spec.NodeSelector == nil || pool.Spec.NodeSelector.MatchLabels == nil {
klog.V(4).Infof("MachineConfigPool %s has no node selector labels", poolName)
return nil
}

labelsToApply := pool.Spec.NodeSelector.MatchLabels
if len(labelsToApply) == 0 {
return nil
}

klog.Infof("Applying node selector labels from custom pool %s to node %s: %v", poolName, node.Name, labelsToApply)

// Apply the labels to the node and add annotation indicating custom pool labels were applied
_, err = internal.UpdateNodeRetry(ctrl.kubeClient.CoreV1().Nodes(), ctrl.nodeLister, node.Name, func(node *corev1.Node) {
// Apply the custom pool labels
maps.Copy(node.Labels, labelsToApply)

// Add annotation to signal that custom pool labels were automatically applied
node.Annotations[daemonconsts.CustomPoolLabelsAppliedAnnotationKey] = ""
})
if err != nil {
return fmt.Errorf("failed to apply custom pool labels to node %s: %w", node.Name, err)
}

klog.Infof("Successfully applied custom pool labels to node %s", node.Name)
return nil
}

// SetDesiredStateFromPool in old mco explains how this works. Somehow you need to NOT FAIL if the mosb doesn't exist. So
// we still need to base this whole things on pools but isLayeredPool == does mosb exist
// updateCandidateMachines sets the desiredConfig annotation the candidate machines
Expand All @@ -1414,6 +1514,10 @@ func (ctrl *Controller) updateCandidateMachines(layered bool, mosc *mcfgv1.Machi
// In practice right now these counts will be 1 but let's stay general to support 5 etcd nodes in the future
ctrl.logPool(pool, "filtered to %d candidate nodes for update, capacity: %d", len(candidates), capacity)
}
candidates = ctrl.filterCustomPoolBootedNodes(candidates)
if len(candidates) == 0 {
return nil
}
if capacity < uint(len(candidates)) {
// when list is longer than maxUnavailable, rollout nodes in zone order, zones without zone label
// are done last from oldest to youngest. this reduces likelihood of randomly picking nodes
Expand Down
4 changes: 4 additions & 0 deletions pkg/daemon/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ const (
CurrentMachineConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig"
// DesiredMachineConfigAnnotationKey is used to specify the desired MachineConfig for a machine
DesiredMachineConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig"
// FirstPivotMachineConfigAnnotationKey is used to specify the MachineConfig the node pivoted to after firstboot.
FirstPivotMachineConfigAnnotationKey = "machineconfiguration.openshift.io/firstPivotConfig"
// CustomPoolLabelsAppliedAnnotationKey is set by the node controller to indicate custom pool labels were automatically applied
CustomPoolLabelsAppliedAnnotationKey = "machineconfiguration.openshift.io/customPoolLabelsApplied"
// MachineConfigDaemonStateAnnotationKey is used to fetch the state of the daemon on the machine.
MachineConfigDaemonStateAnnotationKey = "machineconfiguration.openshift.io/state"
// DesiredDrainerAnnotationKey is set by the MCD to indicate drain/uncordon requests
Expand Down
10 changes: 5 additions & 5 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ func (dn *Daemon) syncNode(key string) error {
klog.Errorf("Error making MCN for Updated: %v", err)
}
}
klog.V(2).Infof("Node %s is already synced", node.Name)
klog.V(4).Infof("Node %s is already synced", node.Name)
if !dn.booting && dn.initializeHealthServer {
// we want to wait until we are done booting AND we only want to do this once
// we also want to give ourselves a little extra buffer. The corner case here is sometimes we get thru the first sync, and then the errors
Expand Down Expand Up @@ -1577,7 +1577,7 @@ func (dn *Daemon) runKubeletHealthzMonitor(stopCh <-chan struct{}, exitCh chan<-
}

func (dn *Daemon) getHealth() error {
klog.V(2).Info("Kubelet health running")
klog.V(4).Info("Kubelet health running")
ctx, cancel := context.WithTimeout(context.Background(), kubeletHealthzTimeout)
defer cancel()

Expand All @@ -1604,7 +1604,7 @@ func (dn *Daemon) getHealth() error {
return nil
}

klog.V(2).Info("Kubelet health ok")
klog.V(4).Info("Kubelet health ok")

return nil
}
Expand Down Expand Up @@ -2508,7 +2508,7 @@ func (dn *Daemon) prepUpdateFromCluster() (*updateFromCluster, error) {
if desiredConfigName == currentConfigName {
if state == constants.MachineConfigDaemonStateDone {
// No actual update to the config
klog.V(2).Info("No updating is required")
klog.V(4).Info("No updating is required")
return nil, nil
}
// This seems like it shouldn't happen...let's just warn for now.
Expand All @@ -2518,7 +2518,7 @@ func (dn *Daemon) prepUpdateFromCluster() (*updateFromCluster, error) {
if desiredImage == odc.currentImage && desiredConfigName == currentConfigName {
if state == constants.MachineConfigDaemonStateDone {
// No actual update to the config
klog.V(2).Info("No updating is required")
klog.V(4).Info("No updating is required")
return nil, nil
}
// This seems like it shouldn't happen...let's just warn for now.
Expand Down
1 change: 1 addition & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ func getNodeAnnotation(conf string) (string, error) {
nodeAnnotations := map[string]string{
daemonconsts.CurrentMachineConfigAnnotationKey: conf,
daemonconsts.DesiredMachineConfigAnnotationKey: conf,
daemonconsts.FirstPivotMachineConfigAnnotationKey: conf,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, for some reason I forgot we did this, neat

daemonconsts.MachineConfigDaemonStateAnnotationKey: daemonconsts.MachineConfigDaemonStateDone,
}
contents, err := json.Marshal(nodeAnnotations)
Expand Down