Skip to content

Commit 40e5040

Browse files
authored
fix: only one workload is generated for Deployment (#379)
* fix: only one workload is generated for the Deployment, with its name adjusted based on the root controller * fix: implement specific logic for the Deployment * fix: check return value when calling addToScheme * fix: align the Workload owner with the Pod controller
1 parent 648dc18 commit 40e5040

File tree

6 files changed

+619
-45
lines changed

6 files changed

+619
-45
lines changed

internal/utils/compose.go

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/samber/lo"
1414
v1 "k8s.io/api/core/v1"
1515
"k8s.io/apimachinery/pkg/api/resource"
16+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617
"k8s.io/apimachinery/pkg/util/intstr"
1718
"k8s.io/utils/ptr"
1819
)
@@ -75,16 +76,12 @@ var featureShortcutMap = map[string]struct {
7576
}
7677

7778
type TensorFusionInfo struct {
78-
Profile *tfv1.WorkloadProfileSpec
79-
DynamicReplicas bool
80-
EnabledReplicas *int32
81-
WorkloadName string
82-
ContainerNames []string
83-
GenWorkload bool
84-
85-
// Pod mutating webhook can not get Pod UID sometimes,
86-
// thus need pod controller to set the owner reference
87-
PendingSetPodAsOwner bool
79+
Profile *tfv1.WorkloadProfileSpec
80+
DynamicReplicas bool
81+
EnabledReplicas *int32
82+
WorkloadName string
83+
PodControllerRef *metav1.OwnerReference
84+
ContainerNames []string
8885
}
8986

9087
func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo) {

internal/utils/owner_ref_utils.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import (
44
"context"
55
"fmt"
66

7+
appsv1 "k8s.io/api/apps/v1"
8+
batchv1 "k8s.io/api/batch/v1"
9+
corev1 "k8s.io/api/core/v1"
710
"k8s.io/apimachinery/pkg/api/errors"
811
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
912
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -96,3 +99,85 @@ func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference {
9699
}
97100
return &ownerRef
98101
}
102+
103+
// FindRootControllerRef recursively finds the root controller reference for a given object (e.g. Pod).
104+
func FindRootControllerRef(ctx context.Context, c client.Client, obj metav1.Object) (*metav1.OwnerReference, error) {
105+
if metav1.GetControllerOfNoCopy(obj) == nil {
106+
return nil, nil
107+
}
108+
109+
namespace := obj.GetNamespace()
110+
current := obj
111+
for {
112+
controllerRef := metav1.GetControllerOf(current)
113+
if controllerRef == nil {
114+
if rObj, ok := current.(runtime.Object); ok {
115+
gvk := rObj.GetObjectKind().GroupVersionKind()
116+
return metav1.NewControllerRef(current, gvk), nil
117+
} else {
118+
return nil, fmt.Errorf("not a runtime.Object")
119+
}
120+
}
121+
122+
unObj := &unstructured.Unstructured{}
123+
unObj.SetAPIVersion(controllerRef.APIVersion)
124+
unObj.SetKind(controllerRef.Kind)
125+
err := c.Get(ctx, client.ObjectKey{Name: controllerRef.Name, Namespace: namespace}, unObj)
126+
if err != nil {
127+
// if not found, return controllerRef as root
128+
if errors.IsNotFound(err) {
129+
return controllerRef, nil
130+
}
131+
return nil, fmt.Errorf("get controller object: %w", err)
132+
}
133+
134+
// Cast back to metav1.Object if possible
135+
if metaObj, ok := any(unObj).(metav1.Object); ok {
136+
current = metaObj
137+
} else {
138+
return nil, fmt.Errorf("unexpected type for controller object %s/%s", controllerRef.Kind, controllerRef.Name)
139+
}
140+
}
141+
}
142+
143+
// GetPodControllerRef returns the controller reference for a Pod.
144+
// For Pods that are indirectly controlled (e.g., by a Deployment or CronJob), return the indirect controller.
145+
// For other cases, it returns the direct controller reference of the Pod.
146+
// If the Pod has no controller reference, it returns nil.
147+
func GetPodControllerRef(ctx context.Context, c client.Client, pod *corev1.Pod) (*metav1.OwnerReference, error) {
148+
podControllerRef := metav1.GetControllerOf(pod)
149+
if podControllerRef == nil {
150+
return nil, nil
151+
}
152+
153+
getControllerRef := func(obj client.Object) (*metav1.OwnerReference, error) {
154+
if err := c.Get(ctx, client.ObjectKey{
155+
Namespace: pod.Namespace,
156+
Name: podControllerRef.Name,
157+
}, obj); err != nil {
158+
if errors.IsNotFound(err) {
159+
return podControllerRef, nil
160+
}
161+
return nil, fmt.Errorf("failed to get %T: %w", obj, err)
162+
}
163+
return metav1.GetControllerOf(obj), nil
164+
}
165+
166+
switch podControllerRef.Kind {
167+
case "ReplicaSet":
168+
if parentRef, err := getControllerRef(&appsv1.ReplicaSet{}); err != nil {
169+
return nil, err
170+
} else if parentRef != nil && parentRef.Kind == "Deployment" {
171+
return parentRef, nil
172+
}
173+
174+
case "Job":
175+
if parentRef, err := getControllerRef(&batchv1.Job{}); err != nil {
176+
return nil, err
177+
} else if parentRef != nil && parentRef.Kind == "CronJob" {
178+
return parentRef, nil
179+
}
180+
}
181+
182+
return podControllerRef, nil
183+
}

internal/utils/owner_ref_utils_test.go

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"testing"
66

77
appsv1 "k8s.io/api/apps/v1"
8+
batchv1 "k8s.io/api/batch/v1"
89
corev1 "k8s.io/api/core/v1"
910
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1011
"k8s.io/apimachinery/pkg/runtime"
@@ -140,3 +141,254 @@ func TestFindRootOwnerReference(t *testing.T) {
140141
require.Equal(t, "ReplicaSet", rootRef.Kind)
141142
})
142143
}
144+
145+
func TestFindRootControllerRef(t *testing.T) {
146+
// Prepare the scheme
147+
sch := runtime.NewScheme()
148+
require.NoError(t, corev1.AddToScheme(sch))
149+
require.NoError(t, appsv1.AddToScheme(sch))
150+
151+
t.Run("no controller returns nil", func(t *testing.T) {
152+
pod := &corev1.Pod{
153+
TypeMeta: metav1.TypeMeta{
154+
APIVersion: "v1",
155+
Kind: "Pod",
156+
},
157+
ObjectMeta: metav1.ObjectMeta{
158+
Name: "mypod",
159+
Namespace: "default",
160+
UID: "uid-pod",
161+
},
162+
}
163+
164+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()
165+
166+
rootRef, err := utils.FindRootControllerRef(context.TODO(), c, pod)
167+
require.NoError(t, err)
168+
require.Nil(t, rootRef)
169+
})
170+
171+
t.Run("hierarchy returns deployment", func(t *testing.T) {
172+
controller := true
173+
deployment := &appsv1.Deployment{
174+
TypeMeta: metav1.TypeMeta{
175+
APIVersion: "apps/v1",
176+
Kind: "Deployment",
177+
},
178+
ObjectMeta: metav1.ObjectMeta{
179+
Name: "mydeploy",
180+
Namespace: "default",
181+
UID: "uid-deploy",
182+
},
183+
}
184+
185+
rs := &appsv1.ReplicaSet{
186+
TypeMeta: metav1.TypeMeta{
187+
APIVersion: "apps/v1",
188+
Kind: "ReplicaSet",
189+
},
190+
ObjectMeta: metav1.ObjectMeta{
191+
Name: "myrs",
192+
Namespace: "default",
193+
UID: "uid-rs",
194+
OwnerReferences: []metav1.OwnerReference{
195+
{
196+
APIVersion: "apps/v1",
197+
Kind: "Deployment",
198+
Name: "mydeploy",
199+
UID: deployment.UID,
200+
Controller: &controller,
201+
},
202+
},
203+
},
204+
}
205+
206+
pod := &corev1.Pod{
207+
TypeMeta: metav1.TypeMeta{
208+
APIVersion: "v1",
209+
Kind: "Pod",
210+
},
211+
ObjectMeta: metav1.ObjectMeta{
212+
Name: "mypod",
213+
Namespace: "default",
214+
UID: "uid-pod",
215+
OwnerReferences: []metav1.OwnerReference{
216+
{
217+
APIVersion: "apps/v1",
218+
Kind: "ReplicaSet",
219+
Name: "myrs",
220+
UID: rs.UID,
221+
Controller: &controller,
222+
},
223+
},
224+
},
225+
}
226+
227+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod, rs, deployment).Build()
228+
229+
rootRef, err := utils.FindRootControllerRef(context.TODO(), c, pod)
230+
require.NoError(t, err)
231+
require.NotNil(t, rootRef)
232+
require.Equal(t, "mydeploy", rootRef.Name)
233+
require.Equal(t, "Deployment", rootRef.Kind)
234+
})
235+
236+
t.Run("missing controller returns last found ref", func(t *testing.T) {
237+
controller := true
238+
pod := &corev1.Pod{
239+
TypeMeta: metav1.TypeMeta{
240+
APIVersion: "v1",
241+
Kind: "Pod",
242+
},
243+
ObjectMeta: metav1.ObjectMeta{
244+
Name: "mypod",
245+
Namespace: "default",
246+
UID: "uid-pod",
247+
OwnerReferences: []metav1.OwnerReference{
248+
{
249+
APIVersion: "apps/v1",
250+
Kind: "ReplicaSet",
251+
Name: "missing-rs",
252+
UID: "uid-missing",
253+
Controller: &controller,
254+
},
255+
},
256+
},
257+
}
258+
259+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()
260+
261+
rootRef, err := utils.FindRootControllerRef(context.TODO(), c, pod)
262+
require.NoError(t, err)
263+
require.NotNil(t, rootRef)
264+
require.Equal(t, "missing-rs", rootRef.Name)
265+
require.Equal(t, "ReplicaSet", rootRef.Kind)
266+
})
267+
}
268+
269+
func TestGetPodControllerRef(t *testing.T) {
270+
// Prepare the scheme
271+
sch := runtime.NewScheme()
272+
require.NoError(t, corev1.AddToScheme(sch))
273+
require.NoError(t, appsv1.AddToScheme(sch))
274+
require.NoError(t, batchv1.AddToScheme(sch))
275+
276+
t.Run("pod with no controller returns nil", func(t *testing.T) {
277+
pod := &corev1.Pod{
278+
ObjectMeta: metav1.ObjectMeta{
279+
Name: "mypod",
280+
Namespace: "default",
281+
},
282+
}
283+
284+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()
285+
286+
ref, err := utils.GetPodControllerRef(context.TODO(), c, pod)
287+
require.NoError(t, err)
288+
require.Nil(t, ref)
289+
})
290+
291+
t.Run("pod owned by replicaset owned by deployment returns deployment ref", func(t *testing.T) {
292+
controller := true
293+
deployment := &appsv1.Deployment{
294+
ObjectMeta: metav1.ObjectMeta{
295+
Name: "mydeploy",
296+
Namespace: "default",
297+
UID: "uid-deploy",
298+
},
299+
}
300+
301+
rs := &appsv1.ReplicaSet{
302+
ObjectMeta: metav1.ObjectMeta{
303+
Name: "myrs",
304+
Namespace: "default",
305+
UID: "uid-rs",
306+
OwnerReferences: []metav1.OwnerReference{
307+
{
308+
APIVersion: "apps/v1",
309+
Kind: "Deployment",
310+
Name: "mydeploy",
311+
UID: deployment.UID,
312+
Controller: &controller,
313+
},
314+
},
315+
},
316+
}
317+
318+
pod := &corev1.Pod{
319+
ObjectMeta: metav1.ObjectMeta{
320+
Name: "mypod",
321+
Namespace: "default",
322+
OwnerReferences: []metav1.OwnerReference{
323+
{
324+
APIVersion: "apps/v1",
325+
Kind: "ReplicaSet",
326+
Name: "myrs",
327+
UID: rs.UID,
328+
Controller: &controller,
329+
},
330+
},
331+
},
332+
}
333+
334+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod, rs, deployment).Build()
335+
336+
ref, err := utils.GetPodControllerRef(context.TODO(), c, pod)
337+
require.NoError(t, err)
338+
require.NotNil(t, ref)
339+
require.Equal(t, "mydeploy", ref.Name)
340+
require.Equal(t, "Deployment", ref.Kind)
341+
})
342+
343+
t.Run("pod owned by job owned by cronjob returns cronjob ref", func(t *testing.T) {
344+
controller := true
345+
cronjob := &batchv1.CronJob{
346+
ObjectMeta: metav1.ObjectMeta{
347+
Name: "mycronjob",
348+
Namespace: "default",
349+
UID: "uid-cronjob",
350+
},
351+
}
352+
353+
job := &batchv1.Job{
354+
ObjectMeta: metav1.ObjectMeta{
355+
Name: "myjob",
356+
Namespace: "default",
357+
UID: "uid-job",
358+
OwnerReferences: []metav1.OwnerReference{
359+
{
360+
APIVersion: "batch/v1",
361+
Kind: "CronJob",
362+
Name: "mycronjob",
363+
UID: cronjob.UID,
364+
Controller: &controller,
365+
},
366+
},
367+
},
368+
}
369+
370+
pod := &corev1.Pod{
371+
ObjectMeta: metav1.ObjectMeta{
372+
Name: "mypod",
373+
Namespace: "default",
374+
OwnerReferences: []metav1.OwnerReference{
375+
{
376+
APIVersion: "batch/v1",
377+
Kind: "Job",
378+
Name: "myjob",
379+
UID: job.UID,
380+
Controller: &controller,
381+
},
382+
},
383+
},
384+
}
385+
386+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod, job, cronjob).Build()
387+
388+
ref, err := utils.GetPodControllerRef(context.TODO(), c, pod)
389+
require.NoError(t, err)
390+
require.NotNil(t, ref)
391+
require.Equal(t, "mycronjob", ref.Name)
392+
require.Equal(t, "CronJob", ref.Kind)
393+
})
394+
}

0 commit comments

Comments
 (0)