Skip to content

Commit 030f890

Browse files
authored
fix: gpu pool controller (#44)
1 parent 9e94c9b commit 030f890

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

internal/controller/gpunode_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ func (r *GPUNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
123123

124124
// Only reconcile if the node has a kubernetes node name, otherwise the DaemonSet like workloads can not be scheduled
125125
if node.Status.KubernetesNodeName == "" {
126-
return ctrl.Result{}, nil
126+
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
127127
}
128128
if poolName == "" {
129129
log.Error(nil, "failed to get pool name", "node", node.Name)

internal/controller/node_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
102102
// Skip creation if the GPUNode already exists
103103
gpuNode := &tfv1.GPUNode{}
104104
if err := r.Client.Get(ctx, client.ObjectKey{Name: node.Name}, gpuNode); err != nil {
105-
if errors.IsNotFound(err) {
105+
if errors.IsNotFound(err) || gpuNode.Status.KubernetesNodeName == "" {
106106
newGPUNode := r.generateGPUNode(node, pool)
107107
// Set owner reference to cascade delete after GPU node created
108108
if err := controllerutil.SetControllerReference(node, newGPUNode, r.Scheme); err != nil {
109109
return ctrl.Result{}, fmt.Errorf("failed to set controller reference: %w", err)
110110
}
111-
_, e := controllerutil.CreateOrPatch(ctx, r.Client, newGPUNode, nil)
111+
_, e := controllerutil.CreateOrUpdate(ctx, r.Client, newGPUNode, nil)
112112
if e != nil {
113113
return ctrl.Result{}, fmt.Errorf("failed to create or patch GPUNode: %w", e)
114114
}

0 commit comments

Comments
 (0)