@@ -15,12 +15,11 @@ import (
1515 "time"
1616
1717 "github.com/bombsimon/logrusr/v2"
18+ workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1819 "github.com/spf13/cobra"
1920 corev1 "k8s.io/api/core/v1"
2021 "k8s.io/apimachinery/pkg/api/errors"
2122 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22- "k8s.io/apimachinery/pkg/fields"
23- "k8s.io/apimachinery/pkg/labels"
2423 "k8s.io/apimachinery/pkg/runtime"
2524 "k8s.io/apimachinery/pkg/types"
2625 utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -113,25 +112,13 @@ var runCmd = &cobra.Command{
113112 log .WithError (err ).Fatal ("unable to bind controller watch event handler" )
114113 }
115114
116- // the pod count reconciler needs an index on spec.nodeName to be able to list pods by node
117- if err := mgr .GetFieldIndexer ().IndexField (
118- context .Background (),
119- & corev1.Pod {},
120- "spec.nodeName" ,
121- func (o client.Object ) []string {
122- pod := o .(* corev1.Pod )
123- return []string {pod .Spec .NodeName }
124- }); err != nil {
125- log .WithError (err ).Fatal ("unable to create index for pod nodeName" )
126- }
127-
128- pc , err := NewPodCountController (mgr .GetClient ())
115+ wc , err := NewWorkspaceCountController (mgr .GetClient ())
129116 if err != nil {
130- log .WithError (err ).Fatal ("unable to create pod count controller" )
117+ log .WithError (err ).Fatal ("unable to create workspace count controller" )
131118 }
132- err = pc .SetupWithManager (mgr )
119+ err = wc .SetupWithManager (mgr )
133120 if err != nil {
134- log .WithError (err ).Fatal ("unable to bind pod count controller" )
121+ log .WithError (err ).Fatal ("unable to bind workspace count controller" )
135122 }
136123
137124 metrics .Registry .MustRegister (NodeLabelerCounterVec )
@@ -159,6 +146,7 @@ var runCmd = &cobra.Command{
159146
160147func init () {
161148 utilruntime .Must (clientgoscheme .AddToScheme (scheme ))
149+ utilruntime .Must (workspacev1 .AddToScheme (scheme ))
162150
163151 rootCmd .AddCommand (runCmd )
164152}
@@ -274,101 +262,119 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
274262 return reconcile.Result {}, nil
275263}
276264
277- type PodCountController struct {
265+ type WorkspaceCountController struct {
278266 client.Client
279267}
280268
281- // NewPodCountController creates a controller that tracks workspace pod counts and updates node annotations
282- func NewPodCountController (client client.Client ) (* PodCountController , error ) {
283- return & PodCountController {
269+ func NewWorkspaceCountController (client client.Client ) (* WorkspaceCountController , error ) {
270+ return & WorkspaceCountController {
284271 Client : client ,
285272 }, nil
286273}
287274
288- func (pc * PodCountController ) SetupWithManager (mgr ctrl.Manager ) error {
275+ func (wc * WorkspaceCountController ) SetupWithManager (mgr ctrl.Manager ) error {
289276 return ctrl .NewControllerManagedBy (mgr ).
290- Named ("pod -count" ).
291- For (& corev1. Pod {}).
292- WithEventFilter (workspacePodFilter ()).
293- Complete (pc )
277+ Named ("workspace -count" ).
278+ For (& workspacev1. Workspace {}).
279+ WithEventFilter (workspaceFilter ()).
280+ Complete (wc )
294281}
295282
296- func workspacePodFilter () predicate.Predicate {
283+ func workspaceFilter () predicate.Predicate {
297284 return predicate.Funcs {
298285 CreateFunc : func (e event.CreateEvent ) bool {
299- pod := e .Object .(* corev1. Pod )
300- return pod . Labels [ "component" ] = = "workspace "
286+ ws := e .Object .(* workspacev1. Workspace )
287+ return ws . Status . Runtime != nil && ws . Status . Runtime . NodeName ! = ""
301288 },
302289 UpdateFunc : func (e event.UpdateEvent ) bool {
303290 return false
304291 },
305292 DeleteFunc : func (e event.DeleteEvent ) bool {
306- pod := e .Object .(* corev1. Pod )
307- return pod . Labels [ "component" ] = = "workspace "
293+ ws := e .Object .(* workspacev1. Workspace )
294+ return ws . Status . Runtime != nil && ws . Status . Runtime . NodeName ! = ""
308295 },
309296 }
310297}
311298
312- func (pc * PodCountController ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
313- log .WithField ("request" , req .NamespacedName .String ()).Info ("PodCountController reconciling" )
299+ func (wc * WorkspaceCountController ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
300+ log .WithField ("request" , req .NamespacedName .String ()).Info ("WorkspaceCountController reconciling" )
314301
315- var pod corev1. Pod
316- if err := pc .Get (ctx , req .NamespacedName , & pod ); err != nil {
302+ var ws workspacev1. Workspace
303+ if err := wc .Get (ctx , req .NamespacedName , & ws ); err != nil {
317304 if ! errors .IsNotFound (err ) {
318- log .WithError (err ).WithField ("pod " , req .NamespacedName ).Error ("unable to fetch Pod " )
305+ log .WithError (err ).WithField ("workspace " , req .NamespacedName ).Error ("unable to fetch Workspace " )
319306 return ctrl.Result {}, err
320307 }
308+ // If workspace not found, do a full reconciliation
309+ log .WithField ("workspace" , req .NamespacedName ).Info ("Workspace not found, reconciling all nodes" )
310+ return wc .reconcileAllNodes (ctx )
311+ }
321312
322- log .WithField ("pod" , req .NamespacedName ).Info ("Pod not found, assuming it was deleted, reconciling all nodes" )
313+ if ws .Status .Runtime != nil && ws .Status .Runtime .NodeName != "" {
314+ var workspaceList workspacev1.WorkspaceList
315+ if err := wc .List (ctx , & workspaceList ); err != nil {
316+ log .WithError (err ).Error ("failed to list workspaces" )
317+ return ctrl.Result {}, err
318+ }
323319
324- // Pod was deleted, reconcile all nodes
325- return pc .reconcileAllNodes (ctx )
326- }
320+ count := 0
321+ nodeName := ws .Status .Runtime .NodeName
322+ for _ , ws := range workspaceList .Items {
323+ if ws .Status .Runtime != nil &&
324+ ws .Status .Runtime .NodeName == nodeName &&
325+ ws .DeletionTimestamp .IsZero () {
326+ count ++
327+ }
328+ }
327329
328- if pod .Spec .NodeName == "" {
329- log .WithField ("pod" , req .NamespacedName ).Info ("Pod has no node, requesting reconciliation" )
330- return ctrl.Result {RequeueAfter : 5 * time .Second }, nil
330+ if err := wc .updateNodeAnnotation (ctx , nodeName , count ); err != nil {
331+ return ctrl.Result {}, err
332+ }
333+ log .WithField ("node" , nodeName ).WithField ("count" , count ).Info ("updated node annotation" )
331334 }
332335
333- return pc . reconcileNode ( ctx , pod . Spec . NodeName )
336+ return ctrl. Result {}, nil
334337}
335338
336- func (pc * PodCountController ) reconcileAllNodes (ctx context.Context ) (ctrl.Result , error ) {
339+ func (wc * WorkspaceCountController ) reconcileAllNodes (ctx context.Context ) (ctrl.Result , error ) {
340+ var workspaceList workspacev1.WorkspaceList
341+ if err := wc .List (ctx , & workspaceList ); err != nil {
342+ log .WithError (err ).Error ("failed to list workspaces" )
343+ return ctrl.Result {}, err
344+ }
345+
346+ workspaceCounts := make (map [string ]int )
347+ for _ , ws := range workspaceList .Items {
348+ if ws .Status .Runtime != nil &&
349+ ws .Status .Runtime .NodeName != "" &&
350+ ws .DeletionTimestamp .IsZero () {
351+ workspaceCounts [ws .Status .Runtime .NodeName ]++
352+ }
353+ }
354+
337355 var nodes corev1.NodeList
338- if err := pc .List (ctx , & nodes ); err != nil {
356+ if err := wc .List (ctx , & nodes ); err != nil {
339357 log .WithError (err ).Error ("failed to list nodes" )
340358 return ctrl.Result {}, err
341359 }
342360
361+ // Update each node's annotation based on its count
343362 for _ , node := range nodes .Items {
344- if _ , err := pc . reconcileNode ( ctx , node .Name ); err != nil {
345- log . WithError ( err ). WithField ( "node" , node .Name ). Error ( "failed to reconcile node" )
346- // Continue with other nodes even if one fails
363+ count := workspaceCounts [ node .Name ]
364+ if err := wc . updateNodeAnnotation ( ctx , node .Name , count ); err != nil {
365+ log . WithError ( err ). WithField ( "node" , node . Name ). Error ( "failed to update node" )
347366 continue
348367 }
349- log .WithField ("node" , node .Name ).Info ("reconciled node" )
368+ log .WithField ("node" , node .Name ).WithField ( "count" , count ). Info ("updated node annotation " )
350369 }
351370
352371 return ctrl.Result {}, nil
353372}
354373
355- func (pc * PodCountController ) reconcileNode (ctx context.Context , nodeName string ) (ctrl.Result , error ) {
356- var podList corev1.PodList
357- err := pc .List (ctx , & podList , & client.ListOptions {
358- FieldSelector : fields .SelectorFromSet (fields.Set {"spec.nodeName" : nodeName }),
359- LabelSelector : labels .SelectorFromSet (labels.Set {"component" : "workspace" }),
360- })
361- if err != nil {
362- log .WithError (err ).WithField ("nodeName" , nodeName ).Error ("failed to list pods" )
363- return ctrl.Result {}, fmt .Errorf ("failed to list pods: %w" , err )
364- }
365-
366- workspaceCount := len (podList .Items )
367- log .WithField ("nodeName" , nodeName ).WithField ("workspaceCount" , workspaceCount ).Info ("reconciling node" )
368-
369- err = retry .RetryOnConflict (retry .DefaultBackoff , func () error {
374+ func (wc * WorkspaceCountController ) updateNodeAnnotation (ctx context.Context , nodeName string , count int ) error {
375+ return retry .RetryOnConflict (retry .DefaultBackoff , func () error {
370376 var node corev1.Node
371- err := pc .Get (ctx , types.NamespacedName {Name : nodeName }, & node )
377+ err := wc .Get (ctx , types.NamespacedName {Name : nodeName }, & node )
372378 if err != nil {
373379 return fmt .Errorf ("obtaining node %s: %w" , nodeName , err )
374380 }
@@ -377,22 +383,16 @@ func (pc *PodCountController) reconcileNode(ctx context.Context, nodeName string
377383 node .Annotations = make (map [string ]string )
378384 }
379385
380- if workspaceCount > 0 {
386+ if count > 0 {
381387 node .Annotations ["cluster-autoscaler.kubernetes.io/scale-down-disabled" ] = "true"
382388 log .WithField ("nodeName" , nodeName ).Info ("disabling scale-down for node" )
383389 } else {
384390 delete (node .Annotations , "cluster-autoscaler.kubernetes.io/scale-down-disabled" )
385391 log .WithField ("nodeName" , nodeName ).Info ("enabling scale-down for node" )
386392 }
387393
388- return pc .Update (ctx , & node )
394+ return wc .Update (ctx , & node )
389395 })
390- if err != nil {
391- log .WithError (err ).WithField ("nodeName" , nodeName ).Error ("failed to update node" )
392- return ctrl.Result {}, fmt .Errorf ("failed to update node: %w" , err )
393- }
394-
395- return ctrl.Result {}, nil
396396}
397397
398398func updateLabel (label string , add bool , nodeName string , client client.Client ) error {
0 commit comments