@@ -19,14 +19,17 @@ package computedomaincontroller
1919import (
2020 "context"
2121 "fmt"
22+ "sort"
2223
2324 resourceapi "k8s.io/api/resource/v1"
2425 apierrors "k8s.io/apimachinery/pkg/api/errors"
2526 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627 "k8s.io/apimachinery/pkg/runtime"
28+ "k8s.io/apimachinery/pkg/types"
2729 ctrl "sigs.k8s.io/controller-runtime"
2830 "sigs.k8s.io/controller-runtime/pkg/client"
2931 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
32+ "sigs.k8s.io/controller-runtime/pkg/handler"
3033 "sigs.k8s.io/controller-runtime/pkg/log"
3134
3235 computedomainv1beta1 "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1"
@@ -59,6 +62,7 @@ type ComputeDomainReconciler struct {
5962//+kubebuilder:rbac:groups=resource.nvidia.com,resources=computedomains/status,verbs=get;update;patch
6063//+kubebuilder:rbac:groups=resource.nvidia.com,resources=computedomains/finalizers,verbs=update
6164//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaimtemplates,verbs=get;list;watch;create;update;patch;delete
65+ //+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaims,verbs=get;list;watch
6266
6367func (r * ComputeDomainReconciler ) Reconcile (ctx context.Context , req ctrl.Request ) (ctrl.Result , error ) {
6468 logger := log .FromContext (ctx )
@@ -81,6 +85,9 @@ func (r *ComputeDomainReconciler) Reconcile(ctx context.Context, req ctrl.Reques
8185 if err := r .ensureResourceClaimTemplates (ctx , domain ); err != nil {
8286 return ctrl.Result {}, err
8387 }
88+ if err := r .updateStatus (ctx , domain ); err != nil {
89+ return ctrl.Result {}, err
90+ }
8491
8592 logger .V (4 ).Info ("reconciled ComputeDomain" , "namespace" , domain .Namespace , "name" , domain .Name )
8693 return ctrl.Result {}, nil
@@ -141,8 +148,8 @@ func (r *ComputeDomainReconciler) ensureTemplate(
141148 Name : name ,
142149 Namespace : domain .Namespace ,
143150 Labels : map [string ]string {
144- "resource.nvidia.com/computeDomain" : domain .Name ,
145- "resource.nvidia.com/computeDomainTarget" : templateType ,
151+ consts . ComputeDomainTemplateLabel : domain .Name ,
152+ consts . ComputeDomainTemplateTargetLabel : templateType ,
146153 },
147154 Finalizers : []string {
148155 consts .ComputeDomainFinalizer ,
@@ -151,7 +158,7 @@ func (r *ComputeDomainReconciler) ensureTemplate(
151158 Spec : resourceapi.ResourceClaimTemplateSpec {
152159 ObjectMeta : metav1.ObjectMeta {
153160 Labels : map [string ]string {
154- "nvidia.com/computeDomain" : domain .Name ,
161+ consts . ComputeDomainClaimLabel : domain .Name ,
155162 },
156163 },
157164 Spec : resourceapi.ResourceClaimSpec {
@@ -229,5 +236,89 @@ func (r *ComputeDomainReconciler) SetupWithManager(mgr ctrl.Manager) error {
229236 return ctrl .NewControllerManagedBy (mgr ).
230237 For (& computedomainv1beta1.ComputeDomain {}).
231238 Owns (& resourceapi.ResourceClaimTemplate {}).
239+ Watches (
240+ & resourceapi.ResourceClaim {},
241+ handler .EnqueueRequestsFromMapFunc (r .mapResourceClaimToComputeDomain ),
242+ ).
232243 Complete (r )
233244}
245+
246+ func (r * ComputeDomainReconciler ) mapResourceClaimToComputeDomain (ctx context.Context , obj client.Object ) []ctrl.Request {
247+ claim , ok := obj .(* resourceapi.ResourceClaim )
248+ if ! ok {
249+ return nil
250+ }
251+
252+ domainName , exists := claim .Labels [consts .ComputeDomainClaimLabel ]
253+ if ! exists {
254+ return nil
255+ }
256+
257+ return []ctrl.Request {{
258+ NamespacedName : types.NamespacedName {
259+ Name : domainName ,
260+ Namespace : claim .Namespace ,
261+ },
262+ }}
263+ }
264+
265+ func (r * ComputeDomainReconciler ) updateStatus (ctx context.Context , domain * computedomainv1beta1.ComputeDomain ) error {
266+ claimList := & resourceapi.ResourceClaimList {}
267+ if err := r .List (ctx , claimList ,
268+ client .InNamespace (domain .Namespace ),
269+ client.MatchingLabels {consts .ComputeDomainClaimLabel : domain .Name },
270+ ); err != nil {
271+ return err
272+ }
273+
274+ nodeSet := make (map [string ]struct {})
275+ for _ , claim := range claimList .Items {
276+ if claim .Status .Allocation == nil {
277+ continue
278+ }
279+ for _ , result := range claim .Status .Allocation .Devices .Results {
280+ if result .Pool != "" {
281+ nodeSet [result .Pool ] = struct {}{}
282+ }
283+ }
284+ }
285+
286+ nodes := make ([]* computedomainv1beta1.ComputeDomainNode , 0 , len (nodeSet ))
287+ for nodeName := range nodeSet {
288+ nodes = append (nodes , & computedomainv1beta1.ComputeDomainNode {
289+ Name : nodeName ,
290+ Status : computedomainv1beta1 .ComputeDomainStatusReady ,
291+ })
292+ }
293+ sort .Slice (nodes , func (i , j int ) bool {
294+ return nodes [i ].Name < nodes [j ].Name
295+ })
296+
297+ status := computedomainv1beta1 .ComputeDomainStatusNotReady
298+ if domain .Spec .NumNodes == 0 || len (nodes ) >= domain .Spec .NumNodes {
299+ status = computedomainv1beta1 .ComputeDomainStatusReady
300+ }
301+
302+ if ! r .statusEqual (domain .Status , nodes , status ) {
303+ domain .Status .Nodes = nodes
304+ domain .Status .Status = status
305+ return r .Status ().Update (ctx , domain )
306+ }
307+
308+ return nil
309+ }
310+
311+ func (r * ComputeDomainReconciler ) statusEqual (current computedomainv1beta1.ComputeDomainStatus , newNodes []* computedomainv1beta1.ComputeDomainNode , newStatus string ) bool {
312+ if current .Status != newStatus {
313+ return false
314+ }
315+ if len (current .Nodes ) != len (newNodes ) {
316+ return false
317+ }
318+ for i , node := range current .Nodes {
319+ if node .Name != newNodes [i ].Name {
320+ return false
321+ }
322+ }
323+ return true
324+ }
0 commit comments