Skip to content

Commit 8ed9024

Browse files
authored
fix: enhance FindRootOwnerReference to handle missing owners and impr… (#251)
* fix: enhance FindRootOwnerReference to handle missing owners and improve error handling * fix: add gpuresourcequotas to RBAC role and update FindRootOwnerReference for better owner handling
1 parent 7d153ce commit 8ed9024

File tree

4 files changed

+191
-7
lines changed

4 files changed

+191
-7
lines changed

charts/tensor-fusion/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 1.3.6
18+
version: 1.3.7
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

config/rbac/role.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ rules:
134134
- gpunodeclasses
135135
- gpunodes
136136
- gpupools
137+
- gpuresourcequotas
137138
- gpus
138139
- schedulingconfigtemplates
139140
- tensorfusionclusters
@@ -154,6 +155,7 @@ rules:
154155
- gpunodeclasses/finalizers
155156
- gpunodes/finalizers
156157
- gpupools/finalizers
158+
- gpuresourcequotas/finalizers
157159
- gpus/finalizers
158160
- schedulingconfigtemplates/finalizers
159161
- tensorfusionclusters/finalizers
@@ -168,6 +170,7 @@ rules:
168170
- gpunodeclasses/status
169171
- gpunodes/status
170172
- gpupools/status
173+
- gpuresourcequotas/status
171174
- gpus/status
172175
- schedulingconfigtemplates/status
173176
- tensorfusionclusters/status

internal/utils/owner_ref_utils.go

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,74 @@ package utils
33
import (
44
context "context"
55

6+
"fmt"
7+
8+
"k8s.io/apimachinery/pkg/api/errors"
69
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
710
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
11+
"k8s.io/apimachinery/pkg/runtime"
812
"sigs.k8s.io/controller-runtime/pkg/client"
913
)
1014

1115
// FindRootOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
1216
func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error) {
17+
owners := obj.GetOwnerReferences()
18+
if len(owners) == 0 {
19+
return nil, nil
20+
}
1321
current := obj
1422
for {
1523
owners := current.GetOwnerReferences()
24+
// if no owner, return self
1625
if len(owners) == 0 {
17-
return nil, nil // no owner, this is root
26+
var apiVersion, kind string
27+
if rObj, ok := current.(runtime.Object); ok {
28+
gvk := rObj.GetObjectKind().GroupVersionKind()
29+
apiVersion = gvk.GroupVersion().String()
30+
kind = gvk.Kind
31+
}
32+
33+
selfRef := metav1.OwnerReference{
34+
APIVersion: apiVersion,
35+
Kind: kind,
36+
Name: current.GetName(),
37+
UID: current.GetUID(),
38+
}
39+
return &selfRef, nil
40+
}
41+
42+
// prefer ownerRef with controller=true
43+
var ownerRef metav1.OwnerReference
44+
foundController := false
45+
for _, ref := range owners {
46+
if ref.Controller != nil && *ref.Controller {
47+
ownerRef = ref
48+
foundController = true
49+
break
50+
}
51+
}
52+
if !foundController {
53+
ownerRef = owners[0]
1854
}
19-
ownerRef := owners[0]
20-
// Try to get the owner object as unstructured
55+
2156
unObj := &unstructured.Unstructured{}
2257
unObj.SetAPIVersion(ownerRef.APIVersion)
2358
unObj.SetKind(ownerRef.Kind)
2459
key := client.ObjectKey{Name: ownerRef.Name, Namespace: namespace}
2560
err := c.Get(ctx, key, unObj)
2661
if err != nil {
27-
// If not found, treat this ownerRef as root
28-
return &ownerRef, nil
62+
// if not found, return ownerRef as root
63+
if errors.IsNotFound(err) {
64+
return &ownerRef, nil
65+
}
66+
return nil, fmt.Errorf("get owner object: %w", err)
2967
}
68+
3069
// Cast back to metav1.Object if possible
3170
if metaObj, ok := any(unObj).(metav1.Object); ok {
3271
current = metaObj
3372
} else {
34-
return &ownerRef, nil
73+
return nil, fmt.Errorf("unexpected type for owner object %s/%s", ownerRef.Kind, ownerRef.Name)
3574
}
3675
}
3776
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
package utils_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
appsv1 "k8s.io/api/apps/v1"
8+
corev1 "k8s.io/api/core/v1"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/runtime"
11+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
12+
13+
"github.com/stretchr/testify/require"
14+
15+
"github.com/NexusGPU/tensor-fusion/internal/utils"
16+
)
17+
18+
func TestFindRootOwnerReference(t *testing.T) {
19+
// Prepare the scheme
20+
sch := runtime.NewScheme()
21+
require.NoError(t, corev1.AddToScheme(sch))
22+
require.NoError(t, appsv1.AddToScheme(sch))
23+
24+
t.Run("no owner returns self", func(t *testing.T) {
25+
pod := &corev1.Pod{
26+
TypeMeta: metav1.TypeMeta{
27+
APIVersion: "v1",
28+
Kind: "Pod",
29+
},
30+
ObjectMeta: metav1.ObjectMeta{
31+
Name: "mypod",
32+
Namespace: "default",
33+
UID: "uid-pod",
34+
},
35+
}
36+
37+
// build fake client with only pod
38+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()
39+
40+
rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
41+
require.NoError(t, err)
42+
require.Nil(t, rootRef)
43+
})
44+
45+
t.Run("hierarchy returns deployment", func(t *testing.T) {
46+
controller := true
47+
deployment := &appsv1.Deployment{
48+
TypeMeta: metav1.TypeMeta{
49+
APIVersion: "apps/v1",
50+
Kind: "Deployment",
51+
},
52+
ObjectMeta: metav1.ObjectMeta{
53+
Name: "mydeploy",
54+
Namespace: "default",
55+
UID: "uid-deploy",
56+
},
57+
}
58+
59+
rs := &appsv1.ReplicaSet{
60+
TypeMeta: metav1.TypeMeta{
61+
APIVersion: "apps/v1",
62+
Kind: "ReplicaSet",
63+
},
64+
ObjectMeta: metav1.ObjectMeta{
65+
Name: "myrs",
66+
Namespace: "default",
67+
UID: "uid-rs",
68+
OwnerReferences: []metav1.OwnerReference{
69+
{
70+
APIVersion: "apps/v1",
71+
Kind: "Deployment",
72+
Name: "mydeploy",
73+
UID: deployment.UID,
74+
Controller: &controller,
75+
},
76+
},
77+
},
78+
}
79+
80+
pod := &corev1.Pod{
81+
TypeMeta: metav1.TypeMeta{
82+
APIVersion: "v1",
83+
Kind: "Pod",
84+
},
85+
ObjectMeta: metav1.ObjectMeta{
86+
Name: "mypod",
87+
Namespace: "default",
88+
UID: "uid-pod",
89+
OwnerReferences: []metav1.OwnerReference{
90+
{
91+
APIVersion: "apps/v1",
92+
Kind: "ReplicaSet",
93+
Name: "myrs",
94+
UID: rs.UID,
95+
Controller: &controller,
96+
},
97+
},
98+
},
99+
}
100+
101+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod, rs, deployment).Build()
102+
103+
rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
104+
require.NoError(t, err)
105+
require.NotNil(t, rootRef)
106+
require.Equal(t, "mydeploy", rootRef.Name)
107+
require.Equal(t, "Deployment", rootRef.Kind)
108+
})
109+
110+
t.Run("missing owner returns ownerRef", func(t *testing.T) {
111+
// Pod refers to a ReplicaSet that doesn't exist in fake client
112+
controller := true
113+
pod := &corev1.Pod{
114+
TypeMeta: metav1.TypeMeta{
115+
APIVersion: "v1",
116+
Kind: "Pod",
117+
},
118+
ObjectMeta: metav1.ObjectMeta{
119+
Name: "mypod",
120+
Namespace: "default",
121+
UID: "uid-pod",
122+
OwnerReferences: []metav1.OwnerReference{
123+
{
124+
APIVersion: "apps/v1",
125+
Kind: "ReplicaSet",
126+
Name: "missing-rs",
127+
UID: "uid-missing",
128+
Controller: &controller,
129+
},
130+
},
131+
},
132+
}
133+
134+
c := fake.NewClientBuilder().WithScheme(sch).WithObjects(pod).Build()
135+
136+
rootRef, err := utils.FindRootOwnerReference(context.TODO(), c, "default", pod)
137+
require.NoError(t, err)
138+
require.NotNil(t, rootRef)
139+
require.Equal(t, "missing-rs", rootRef.Name)
140+
require.Equal(t, "ReplicaSet", rootRef.Kind)
141+
})
142+
}

0 commit comments

Comments
 (0)