Skip to content

Commit 3c9d0cd

Browse files
committed
fix: rename GPUNodePoolIdentifierLabelKey to GPUNodePoolIdentifierLabelFormat
1 parent 3fb98ff commit 3c9d0cd

File tree

5 files changed

+14
-6
lines changed

5 files changed

+14
-6
lines changed

internal/constants/constants.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ const (
1313
LabelKeyOwner = Domain + "/managed-by"
1414

1515
GPUNodePoolIdentifierLabelPrefix = Domain + "/pool/"
16-
GPUNodePoolIdentifierLabelKey = Domain + "/pool/%s"
16+
GPUNodePoolIdentifierLabelFormat = Domain + "/pool/%s"
1717
NodeDeletionMark = Domain + "/should-delete"
1818

1919
TensorFusionEnabledLabelKey = Domain + "/enabled"

internal/controller/gpunode_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func (r *GPUNodeReconciler) reconcileHypervisorPod(ctx context.Context, node *tf
130130
Name: hypervisorPodName,
131131
Namespace: namespace,
132132
Labels: map[string]string{
133-
fmt.Sprintf(constants.GPUNodePoolIdentifierLabelKey, poolName): "true",
133+
fmt.Sprintf(constants.GPUNodePoolIdentifierLabelFormat, poolName): "true",
134134
},
135135
},
136136
Spec: *spec,

internal/controller/gpupool_compaction_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (r *GPUPoolCompactionReconciler) checkNodeCompaction(ctx context.Context, p
4949
// Strategy #1, terminate empty node
5050
allNodes := &tfv1.GPUNodeList{}
5151
if err := r.List(ctx, allNodes, client.MatchingLabels(map[string]string{
52-
fmt.Sprintf(constants.GPUNodePoolIdentifierLabelKey, pool.Name): "true",
52+
fmt.Sprintf(constants.GPUNodePoolIdentifierLabelFormat, pool.Name): "true",
5353
})); err != nil {
5454
return fmt.Errorf("failed to list nodes : %w", err)
5555
}

internal/controller/gpupool_controller.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ import (
3030
corev1 "k8s.io/api/core/v1"
3131
"k8s.io/apimachinery/pkg/api/errors"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33+
"k8s.io/apimachinery/pkg/labels"
3334
"k8s.io/apimachinery/pkg/runtime"
35+
"k8s.io/apimachinery/pkg/selection"
3436
"k8s.io/apimachinery/pkg/types"
3537
"k8s.io/client-go/tools/record"
3638
schedulingcorev1 "k8s.io/component-helpers/scheduling/corev1"
@@ -123,9 +125,15 @@ func (r *GPUPoolReconciler) startNodeDiscoverys(
123125
return fmt.Errorf("unmarshal pod template: %w", err)
124126
}
125127
// pool.Spec.NodeManagerConfig.NodeSelector
128+
selector := labels.NewSelector()
129+
poolReq, err := labels.NewRequirement(fmt.Sprintf(constants.GPUNodePoolIdentifierLabelFormat, pool.Name), selection.DoubleEquals, []string{"false"})
130+
if err != nil {
131+
return fmt.Errorf("new GPUNodePoolIdentifier label seletor: %w", err)
132+
}
133+
selector = selector.Add(*poolReq)
126134
nodes := &tfv1.GPUNodeList{}
127-
if err := r.Client.List(ctx, nodes); err != nil {
128-
return fmt.Errorf("list nodes: %v", err)
135+
if err := r.Client.List(ctx, nodes, &client.ListOptions{LabelSelector: selector}); err != nil {
136+
return fmt.Errorf("list gpunodes: %v", err)
129137
}
130138

131139
for _, gpuNode := range nodes.Items {

internal/controller/node_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func (r *NodeReconciler) generateGPUNode(ctx context.Context, node *corev1.Node,
9393
ObjectMeta: metav1.ObjectMeta{
9494
Name: node.Name,
9595
Labels: map[string]string{
96-
constants.GPUNodePoolIdentifierLabelKey: poolName,
96+
fmt.Sprint(constants.GPUNodePoolIdentifierLabelFormat, poolName): "true",
9797
},
9898
},
9999
Spec: tfv1.GPUNodeSpec{

0 commit comments

Comments
 (0)