@@ -36,74 +36,46 @@ import (
3636 "github.com/project-codeflare/appwrapper/pkg/config"
3737)
3838
39- // NodeHealthMonitor maintains the set of nodes that Autopilot has labelled as unhealthy
39+ // NodeHealthMonitor watches Nodes and maintains mappings of Nodes that have either
40+ // been marked as Unschedulable or that have been labeled to indicate that
41+ // they have resources that Autopilot has tainted as NoSchedule or NoExeucte.
42+ // This information is used to automate the maintenance of the lendingLimit of
43+ // a designated slack ClusterQueue and to migrate running workloads away from NoExecute resources.
4044type NodeHealthMonitor struct {
4145 client.Client
4246 Config * config.AppWrapperConfig
4347}
4448
4549var (
46- // unhealthyNodes is a mapping from Node names to a set of resources that Autopilot has labeled as unhealthy on that Node
47- unhealthyNodes = make (map [string ]sets.Set [string ])
48- unhealthyNodesMutex sync.RWMutex
49-
50- // unschedulableNodes is a mapping from Node names to resource quantities than Autopilot has labeled as unschedulable on that Node
51- unschedulableNodes = make (map [string ]map [string ]* resource.Quantity )
50+ // noExecuteNodes is a mapping from Node names to resources with an Autopilot NoExeucte taint
51+ noExecuteNodes = make (map [string ]sets.Set [string ])
52+ noExecuteNodesMutex sync.RWMutex
53+
54+ // noScheduleNodes is a mapping from Node names to resource quantities that are unschedulable.
55+ // A resource may be unscheduable either because:
56+ // (a) the Node is cordoned (node.Spec.Unschedulable is true) or
57+ // (b) Autopilot has labeled the with either a NoExecute or NoSchedule taint.
58+ noScheduleNodes = make (map [string ]map [string ]* resource.Quantity )
5259)
5360
5461// permission to watch nodes
5562//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
5663//+kubebuilder:rbac:groups=kueue.x-k8s.io,resources=clusterqueues,verbs=get;list;watch;update;patch
5764
58- //gocyclo:ignore
5965func (r * NodeHealthMonitor ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
6066 node := & v1.Node {}
6167 if err := r .Get (ctx , req .NamespacedName , node ); err != nil {
6268 return ctrl.Result {}, nil
6369 }
6470
65- flaggedResources := make (sets.Set [string ])
66- for key , value := range node .GetLabels () {
67- for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
68- for _ , taint := range taints {
69- if key == taint .Key && value == taint .Value && taint .Effect == v1 .TaintEffectNoExecute {
70- flaggedResources .Insert (resourceName )
71- }
72- }
73- }
74- }
75-
76- nodeChanged := false
77- unhealthyNodesMutex .Lock () // BEGIN CRITICAL SECTION
78- if priorEntry , ok := unhealthyNodes [node .GetName ()]; ok {
79- if len (flaggedResources ) == 0 {
80- delete (unhealthyNodes , node .GetName ())
81- nodeChanged = true
82- } else if ! priorEntry .Equal (flaggedResources ) {
83- unhealthyNodes [node .GetName ()] = flaggedResources
84- nodeChanged = true
85- }
86- } else if len (flaggedResources ) > 0 {
87- unhealthyNodes [node .GetName ()] = flaggedResources
88- nodeChanged = true
89- }
90- unhealthyNodesMutex .Unlock () // END CRITICAL SECTION
71+ r .updateNoExecuteNodes (ctx , node )
9172
92- // Unsynchronized reads of unhealthyNodes below are safe because this method
93- // is the only writer to the map and the controller runtime is configured to
94- // not allow concurrent execution of this method.
95-
96- if nodeChanged {
97- log .FromContext (ctx ).Info ("Updated node health information" , "Number Unhealthy Nodes" , len (unhealthyNodes ), "Unhealthy Resource Details" , unhealthyNodes )
98- }
99-
100- // update lending limits on slack quota if configured
73+ // If there is a slack ClusterQueue, update its lending limits
10174
10275 if r .Config .SlackQueueName == "" {
10376 return ctrl.Result {}, nil
10477 }
10578
106- // get slack quota
10779 cq := & kueue.ClusterQueue {}
10880 if err := r .Get (ctx , types.NamespacedName {Name : r .Config .SlackQueueName }, cq ); err != nil {
10981 if errors .IsNotFound (err ) {
@@ -112,36 +84,80 @@ func (r *NodeHealthMonitor) Reconcile(ctx context.Context, req ctrl.Request) (ct
11284 return ctrl.Result {}, err
11385 }
11486
87+ r .updateNoScheduleNodes (ctx , cq , node )
88+
89+ return r .updateLendingLimits (ctx , cq )
90+ }
91+
92+ func (r * NodeHealthMonitor ) updateNoExecuteNodes (ctx context.Context , node * v1.Node ) {
93+ noExecuteResources := make (sets.Set [string ])
94+ for key , value := range node .GetLabels () {
95+ for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
96+ for _ , taint := range taints {
97+ if key == taint .Key && value == taint .Value && taint .Effect == v1 .TaintEffectNoExecute {
98+ noExecuteResources .Insert (resourceName )
99+ }
100+ }
101+ }
102+ }
103+
104+ noExecuteNodesChanged := false
105+ noExecuteNodesMutex .Lock () // BEGIN CRITICAL SECTION
106+ if priorEntry , ok := noExecuteNodes [node .GetName ()]; ok {
107+ if len (noExecuteResources ) == 0 {
108+ delete (noExecuteNodes , node .GetName ())
109+ noExecuteNodesChanged = true
110+ } else if ! priorEntry .Equal (noExecuteResources ) {
111+ noExecuteNodes [node .GetName ()] = noExecuteResources
112+ noExecuteNodesChanged = true
113+ }
114+ } else if len (noExecuteResources ) > 0 {
115+ noExecuteNodes [node .GetName ()] = noExecuteResources
116+ noExecuteNodesChanged = true
117+ }
118+ noExecuteNodesMutex .Unlock () // END CRITICAL SECTION
119+
120+ // Safe to log outside the mutex because because this method is the only writer of noExecuteNodes
121+ // and the controller runtime is configured to not allow concurrent execution of this controller.
122+ if noExecuteNodesChanged {
123+ log .FromContext (ctx ).Info ("Updated node NoExecute information" , "Number NoExecute Nodes" , len (noExecuteNodes ), "NoExecute Resource Details" , noExecuteNodes )
124+ }
125+ }
126+
127+ func (r * NodeHealthMonitor ) updateNoScheduleNodes (_ context.Context , cq * kueue.ClusterQueue , node * v1.Node ) {
115128 // update unschedulable resource quantities for this node
116- flaggedQuantities := make (map [string ]* resource.Quantity )
129+ noScheduleQuantities := make (map [string ]* resource.Quantity )
117130 if node .Spec .Unschedulable {
118- // flag all non-pod resources covered by cq if the node is cordoned
131+ // add all non-pod resources covered by cq if the node is cordoned
119132 for _ , resourceName := range cq .Spec .ResourceGroups [0 ].Flavors [0 ].Resources {
120133 if string (resourceName .Name ) != "pods" {
121- flaggedQuantities [string (resourceName .Name )] = node .Status .Capacity .Name (resourceName .Name , resource .DecimalSI )
134+ noScheduleQuantities [string (resourceName .Name )] = node .Status .Capacity .Name (resourceName .Name , resource .DecimalSI )
122135 }
123136 }
124137 } else {
125138 for key , value := range node .GetLabels () {
126139 for resourceName , taints := range r .Config .Autopilot .ResourceTaints {
127140 for _ , taint := range taints {
128141 if key == taint .Key && value == taint .Value {
129- flaggedQuantities [resourceName ] = node .Status .Capacity .Name (v1 .ResourceName (resourceName ), resource .DecimalSI )
142+ noScheduleQuantities [resourceName ] = node .Status .Capacity .Name (v1 .ResourceName (resourceName ), resource .DecimalSI )
130143 }
131144 }
132145 }
133146 }
134147 }
135148
136- if len (flaggedQuantities ) > 0 {
137- unschedulableNodes [node .GetName ()] = flaggedQuantities
149+ if len (noScheduleQuantities ) > 0 {
150+ noScheduleNodes [node .GetName ()] = noScheduleQuantities
138151 } else {
139- delete (unschedulableNodes , node .GetName ())
152+ delete (noScheduleNodes , node .GetName ())
140153 }
154+ }
155+
156+ func (r * NodeHealthMonitor ) updateLendingLimits (ctx context.Context , cq * kueue.ClusterQueue ) (ctrl.Result , error ) {
141157
142158 // compute unschedulable resource totals
143159 unschedulableQuantities := map [string ]* resource.Quantity {}
144- for _ , quantities := range unschedulableNodes {
160+ for _ , quantities := range noScheduleNodes {
145161 for resourceName , quantity := range quantities {
146162 if ! quantity .IsZero () {
147163 if unschedulableQuantities [resourceName ] == nil {
0 commit comments