Skip to content

Commit be3e7f3

Browse files
committed
kai skip create podgroups
Signed-off-by: kangclzjc <kangz@nvidia.com>
1 parent 8eff55f commit be3e7f3

File tree

1 file changed

+54
-45
lines changed

1 file changed

+54
-45
lines changed

operator/internal/schedulerbackend/kai/backend.go

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -78,57 +78,66 @@ func (b *Backend) Init() error {
7878
}
7979

8080
// SyncPodGang converts PodGang to KAI PodGroup and synchronizes it
81+
// TODO: Currently disabled - will be implemented in phase 2
8182
func (b *Backend) SyncPodGang(ctx context.Context, podGang *groveschedulerv1alpha1.PodGang) error {
82-
83-
// Convert PodGang to PodGroup
84-
podGroup := b.convertPodGangToPodGroup(podGang)
85-
86-
// Create or update PodGroup
87-
existing := &unstructured.Unstructured{}
88-
existing.SetGroupVersionKind(podGroupGVK())
89-
90-
err := b.client.Get(ctx, client.ObjectKey{
91-
Namespace: podGroup.GetNamespace(),
92-
Name: podGroup.GetName(),
93-
}, existing)
94-
95-
if err != nil {
96-
if client.IgnoreNotFound(err) != nil {
97-
return fmt.Errorf("failed to get existing PodGroup: %w", err)
98-
}
99-
100-
// Create new PodGroup
101-
if err := b.client.Create(ctx, podGroup); err != nil {
102-
return fmt.Errorf("failed to create PodGroup: %w", err)
103-
}
104-
return nil
105-
}
106-
107-
// Update existing PodGroup
108-
podGroup.SetResourceVersion(existing.GetResourceVersion())
109-
if err := b.client.Update(ctx, podGroup); err != nil {
110-
return fmt.Errorf("failed to update PodGroup: %w", err)
111-
}
112-
83+
// Phase 1: Skip PodGroup creation/update
84+
// Phase 2: Will convert PodGang to PodGroup and synchronize
11385
return nil
86+
87+
// Convert PodGang to PodGroup (disabled)
88+
// podGroup := b.convertPodGangToPodGroup(podGang)
89+
//
90+
// // Create or update PodGroup
91+
// existing := &unstructured.Unstructured{}
92+
// existing.SetGroupVersionKind(podGroupGVK())
93+
//
94+
// err := b.client.Get(ctx, client.ObjectKey{
95+
// Namespace: podGroup.GetNamespace(),
96+
// Name: podGroup.GetName(),
97+
// }, existing)
98+
//
99+
// if err != nil {
100+
// if client.IgnoreNotFound(err) != nil {
101+
// return fmt.Errorf("failed to get existing PodGroup: %w", err)
102+
// }
103+
//
104+
// // Create new PodGroup
105+
// if err := b.client.Create(ctx, podGroup); err != nil {
106+
// return fmt.Errorf("failed to create PodGroup: %w", err)
107+
// }
108+
// return nil
109+
// }
110+
//
111+
// // Update existing PodGroup
112+
// podGroup.SetResourceVersion(existing.GetResourceVersion())
113+
// if err := b.client.Update(ctx, podGroup); err != nil {
114+
// return fmt.Errorf("failed to update PodGroup: %w", err)
115+
// }
116+
//
117+
// return nil
114118
}
115119

116120
// OnPodGangDelete removes the PodGroup owned by this PodGang
121+
// TODO: Currently disabled - will be implemented in phase 2
117122
func (b *Backend) OnPodGangDelete(ctx context.Context, podGang *groveschedulerv1alpha1.PodGang) error {
118-
119-
podGroup := &unstructured.Unstructured{}
120-
podGroup.SetGroupVersionKind(podGroupGVK())
121-
podGroup.SetName(b.getPodGroupName(podGang))
122-
podGroup.SetNamespace(podGang.Namespace)
123-
124-
if err := b.client.Delete(ctx, podGroup); err != nil {
125-
if client.IgnoreNotFound(err) == nil {
126-
return nil // Already deleted
127-
}
128-
return fmt.Errorf("failed to delete PodGroup: %w", err)
129-
}
130-
123+
// Phase 1: Skip PodGroup deletion
124+
// Phase 2: Will delete PodGroup when PodGang is deleted
131125
return nil
126+
127+
// Delete PodGroup (disabled)
128+
// podGroup := &unstructured.Unstructured{}
129+
// podGroup.SetGroupVersionKind(podGroupGVK())
130+
// podGroup.SetName(b.getPodGroupName(podGang))
131+
// podGroup.SetNamespace(podGang.Namespace)
132+
//
133+
// if err := b.client.Delete(ctx, podGroup); err != nil {
134+
// if client.IgnoreNotFound(err) == nil {
135+
// return nil // Already deleted
136+
// }
137+
// return fmt.Errorf("failed to delete PodGroup: %w", err)
138+
// }
139+
//
140+
// return nil
132141
}
133142

134143
// PreparePod adds KAI scheduler-specific configuration to the Pod
@@ -149,7 +158,7 @@ func (b *Backend) PreparePod(pod *corev1.Pod) {
149158
if pod.Annotations == nil {
150159
pod.Annotations = make(map[string]string)
151160
}
152-
161+
153162
// Get PodGang and PodGroup names from labels
154163
if podGangName, ok := pod.Labels[common.LabelPodGang]; ok {
155164
pod.Annotations["kai.scheduler/podgang"] = podGangName

0 commit comments

Comments
 (0)