Skip to content

Commit 7988ca8

Browse files
committed
fix: type issue
1 parent 854bf0d commit 7988ca8

File tree

4 files changed

+9
-11
lines changed

4 files changed

+9
-11
lines changed

api/v1/gpupool_types.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ type NodeProvisioner struct {
124124
CPULabels map[string]string `json:"cpuNodeLabels,omitempty"`
125125

126126
// +optional
127-
// +kubebuilder:default="100"
128127
// NodeProvisioner will start an virtual billing based on public pricing or customized pricing, if the VM's costs exceeded any budget constraints, the new VM will not be created, and alerts will be generated
129128
Budget *PeriodicalBudget `json:"budget,omitempty"`
130129
}
@@ -161,7 +160,6 @@ const (
161160
)
162161

163162
type Requirement struct {
164-
// +kubebuilder:default=""
165163
Key NodeRequirementKey `json:"key,omitempty"`
166164

167165
// +kubebuilder:default="In"

config/crd/bases/tensor-fusion.ai_gpupools.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ spec:
256256
NodeSelector is for existing GPUs, NodeProvisioner is for Karpenter-like auto management.
257257
properties:
258258
budget:
259-
default: "100"
260259
description: NodeProvisioner will start an virtual billing
261260
based on public pricing or customized pricing, if the VM's
262261
costs exceeded any budget constraints, the new VM will not
@@ -286,7 +285,6 @@ spec:
286285
items:
287286
properties:
288287
key:
289-
default: ""
290288
enum:
291289
- node.kubernetes.io/instance-type
292290
- kubernetes.io/arch
@@ -337,7 +335,6 @@ spec:
337335
items:
338336
properties:
339337
key:
340-
default: ""
341338
enum:
342339
- node.kubernetes.io/instance-type
343340
- kubernetes.io/arch

config/crd/bases/tensor-fusion.ai_tensorfusionclusters.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,6 @@ spec:
354354
NodeSelector is for existing GPUs, NodeProvisioner is for Karpenter-like auto management.
355355
properties:
356356
budget:
357-
default: "100"
358357
description: NodeProvisioner will start an virtual
359358
billing based on public pricing or customized
360359
pricing, if the VM's costs exceeded any budget
@@ -385,7 +384,6 @@ spec:
385384
items:
386385
properties:
387386
key:
388-
default: ""
389387
enum:
390388
- node.kubernetes.io/instance-type
391389
- kubernetes.io/arch
@@ -436,7 +434,6 @@ spec:
436434
items:
437435
properties:
438436
key:
439-
default: ""
440437
enum:
441438
- node.kubernetes.io/instance-type
442439
- kubernetes.io/arch

internal/controller/gpupool_controller.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ func (r *GPUPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
8484
}
8585

8686
// For provisioning mode, check if need to scale up GPUNodes upon AvailableCapacity changed
87-
if pool.Spec.NodeManagerConfig.NodeProvisioner != nil && pool.Spec.NodeManagerConfig.NodeProvisioner.Mode == tfv1.NodeProvisionerModeNative {
87+
isProvisioningMode := pool.Spec.NodeManagerConfig.NodeProvisioner != nil && pool.Spec.NodeManagerConfig.NodeProvisioner.Mode == tfv1.NodeProvisionerModeNative
88+
if isProvisioningMode {
8889
if err := r.reconcilePoolCapacityWithProvisioner(ctx, pool); err != nil {
8990
return ctrl.Result{}, err
9091
}
@@ -95,8 +96,10 @@ func (r *GPUPoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
9596

9697
// TODO, any GPUNode changes trigger GPUPool reconcile, it should change the status, aggregate the total amount of resources, update current status
9798
// THIS NEED TO MOVE INTO GPU NODE CONTROLLER, rather than POOL CONTROLLER
98-
if err := r.startNodeDiscoverys(ctx, pool); err != nil {
99-
return ctrl.Result{}, err
99+
if !isProvisioningMode {
100+
if err := r.startNodeDiscoverys(ctx, pool); err != nil {
101+
return ctrl.Result{}, err
102+
}
100103
}
101104
// TODO, when componentConfig changed, it should notify corresponding resource to upgrade
102105
// eg. when hypervisor changed, should change all owned GPUNode's status.phase to Updating
@@ -111,6 +114,9 @@ func (r *GPUPoolReconciler) startNodeDiscoverys(
111114
log := log.FromContext(ctx)
112115
log.Info("Starting node node discovery job")
113116

117+
if pool.Spec.ComponentConfig == nil || pool.Spec.ComponentConfig.NodeDiscovery.PodTemplate == nil {
118+
return fmt.Errorf(`missing node discovery pod template in pool spec`)
119+
}
114120
podTmpl := &corev1.PodTemplate{}
115121
err := json.Unmarshal(pool.Spec.ComponentConfig.NodeDiscovery.PodTemplate.Raw, podTmpl)
116122
if err != nil {

0 commit comments

Comments
 (0)