diff --git a/deploy/fake-gpu-operator/templates/compute-domain-controller/clusterrole.yaml b/deploy/fake-gpu-operator/templates/compute-domain-controller/clusterrole.yaml index ca244c3..8520f44 100644 --- a/deploy/fake-gpu-operator/templates/compute-domain-controller/clusterrole.yaml +++ b/deploy/fake-gpu-operator/templates/compute-domain-controller/clusterrole.yaml @@ -7,7 +7,7 @@ metadata: rules: - apiGroups: ["resource.k8s.io"] resources: ["resourceclaims"] - verbs: ["get"] + verbs: ["get", "list", "watch"] - apiGroups: [""] resources: ["nodes"] verbs: ["get"] diff --git a/internal/compute-domain-controller/app.go b/internal/compute-domain-controller/app.go index 5b2fef0..5fe79eb 100644 --- a/internal/compute-domain-controller/app.go +++ b/internal/compute-domain-controller/app.go @@ -6,15 +6,19 @@ import ( "go.uber.org/zap/zapcore" resourceapi "k8s.io/api/resource/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" computedomainv1beta1 "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1" "github.com/run-ai/fake-gpu-operator/internal/common/app" + "github.com/run-ai/fake-gpu-operator/pkg/compute-domain/consts" ) var ( @@ -85,6 +89,11 @@ func (app *ComputeDomainApp) Run() { func (app *ComputeDomainApp) runController(ctx context.Context) error { cfg := ctrl.GetConfigOrDie() + computeDomainLabelSelector, err := labels.Parse(consts.ComputeDomainClaimLabel) + if err != nil { + return fmt.Errorf("failed to parse label selector: %w", err) + } + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{ @@ -93,6 +102,13 @@ func (app *ComputeDomainApp) runController(ctx context.Context) error { HealthProbeBindAddress: app.config.HealthProbeAddress, LeaderElection: app.config.LeaderElection, LeaderElectionID: "fake-compute-domain-controller", + Cache: cache.Options{ + ByObject: map[client.Object]cache.ByObject{ + &resourceapi.ResourceClaim{}: { + Label: computeDomainLabelSelector, + }, + }, + }, }) if err != nil { return fmt.Errorf("failed to create controller manager: %w", err) diff --git a/internal/compute-domain-controller/computedomain_controller.go b/internal/compute-domain-controller/computedomain_controller.go index 5eb3ac4..9743ed6 100644 --- a/internal/compute-domain-controller/computedomain_controller.go +++ b/internal/compute-domain-controller/computedomain_controller.go @@ -19,14 +19,17 @@ package computedomaincontroller import ( "context" "fmt" + "sort" resourceapi "k8s.io/api/resource/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" computedomainv1beta1 "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1" @@ -59,6 +62,7 @@ type ComputeDomainReconciler struct { //+kubebuilder:rbac:groups=resource.nvidia.com,resources=computedomains/status,verbs=get;update;patch //+kubebuilder:rbac:groups=resource.nvidia.com,resources=computedomains/finalizers,verbs=update //+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaimtemplates,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=resource.k8s.io,resources=resourceclaims,verbs=get;list;watch func (r *ComputeDomainReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx) @@ -81,6 +85,9 @@ func (r *ComputeDomainReconciler) Reconcile(ctx context.Context, req ctrl.Reques if err := r.ensureResourceClaimTemplates(ctx, domain); err != nil { return ctrl.Result{}, err } + if err := r.updateStatus(ctx, domain); err != nil { + return ctrl.Result{}, err + } logger.V(4).Info("reconciled ComputeDomain", "namespace", domain.Namespace, "name", domain.Name) return ctrl.Result{}, nil @@ -141,8 +148,8 @@ func (r *ComputeDomainReconciler) ensureTemplate( Name: name, Namespace: domain.Namespace, Labels: map[string]string{ - "resource.nvidia.com/computeDomain": domain.Name, - "resource.nvidia.com/computeDomainTarget": templateType, + consts.ComputeDomainTemplateLabel: domain.Name, + consts.ComputeDomainTemplateTargetLabel: templateType, }, Finalizers: []string{ consts.ComputeDomainFinalizer, @@ -151,7 +158,7 @@ func (r *ComputeDomainReconciler) ensureTemplate( Spec: resourceapi.ResourceClaimTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ - "nvidia.com/computeDomain": domain.Name, + consts.ComputeDomainClaimLabel: domain.Name, }, }, Spec: resourceapi.ResourceClaimSpec{ @@ -229,5 +236,89 @@ func (r *ComputeDomainReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&computedomainv1beta1.ComputeDomain{}). Owns(&resourceapi.ResourceClaimTemplate{}). + Watches( + &resourceapi.ResourceClaim{}, + handler.EnqueueRequestsFromMapFunc(r.mapResourceClaimToComputeDomain), + ). Complete(r) } + +func (r *ComputeDomainReconciler) mapResourceClaimToComputeDomain(ctx context.Context, obj client.Object) []ctrl.Request { + claim, ok := obj.(*resourceapi.ResourceClaim) + if !ok { + return nil + } + + domainName, exists := claim.Labels[consts.ComputeDomainClaimLabel] + if !exists { + return nil + } + + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Name: domainName, + Namespace: claim.Namespace, + }, + }} +} + +func (r *ComputeDomainReconciler) updateStatus(ctx context.Context, domain *computedomainv1beta1.ComputeDomain) error { + claimList := &resourceapi.ResourceClaimList{} + if err := r.List(ctx, claimList, + client.InNamespace(domain.Namespace), + client.MatchingLabels{consts.ComputeDomainClaimLabel: domain.Name}, + ); err != nil { + return err + } + + nodeSet := make(map[string]struct{}) + for _, claim := range claimList.Items { + if claim.Status.Allocation == nil { + continue + } + for _, result := range claim.Status.Allocation.Devices.Results { + if result.Pool != "" { + nodeSet[result.Pool] = struct{}{} + } + } + } + + nodes := make([]*computedomainv1beta1.ComputeDomainNode, 0, len(nodeSet)) + for nodeName := range nodeSet { + nodes = append(nodes, &computedomainv1beta1.ComputeDomainNode{ + Name: nodeName, + Status: computedomainv1beta1.ComputeDomainStatusReady, + }) + } + sort.Slice(nodes, func(i, j int) bool { + return nodes[i].Name < nodes[j].Name + }) + + status := computedomainv1beta1.ComputeDomainStatusNotReady + if domain.Spec.NumNodes == 0 || len(nodes) >= domain.Spec.NumNodes { + status = computedomainv1beta1.ComputeDomainStatusReady + } + + if !r.statusEqual(domain.Status, nodes, status) { + domain.Status.Nodes = nodes + domain.Status.Status = status + return r.Status().Update(ctx, domain) + } + + return nil +} + +func (r *ComputeDomainReconciler) statusEqual(current computedomainv1beta1.ComputeDomainStatus, newNodes []*computedomainv1beta1.ComputeDomainNode, newStatus string) bool { + if current.Status != newStatus { + return false + } + if len(current.Nodes) != len(newNodes) { + return false + } + for i, node := range current.Nodes { + if node.Name != newNodes[i].Name { + return false + } + } + return true +} diff --git a/internal/compute-domain-controller/computedomain_controller_test.go b/internal/compute-domain-controller/computedomain_controller_test.go index fdf1261..231041c 100644 --- a/internal/compute-domain-controller/computedomain_controller_test.go +++ b/internal/compute-domain-controller/computedomain_controller_test.go @@ -76,7 +76,7 @@ func TestComputeDomainReconciler_Reconcile(t *testing.T) { Name: "test-domain", Namespace: "default", Labels: map[string]string{ - "resource.nvidia.com/computeDomain": "test-domain", + consts.ComputeDomainTemplateLabel: "test-domain", }, Finalizers: []string{consts.ComputeDomainFinalizer}, OwnerReferences: []metav1.OwnerReference{ @@ -104,6 +104,7 @@ func TestComputeDomainReconciler_Reconcile(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objs...). + WithStatusSubresource(test.computeDomain). Build() reconciler := &controller.ComputeDomainReconciler{ @@ -143,10 +144,10 @@ func TestComputeDomainReconciler_Reconcile(t *testing.T) { assert.Equal(t, int64(1), workloadTemplate.Spec.Spec.Devices.Requests[0].Exactly.Count) assert.Equal(t, consts.ComputeDomainWorkloadDeviceClass, workloadTemplate.Spec.Spec.Devices.Requests[0].Exactly.DeviceClassName) // Check labels - assert.Equal(t, test.computeDomain.GetName(), workloadTemplate.Labels["resource.nvidia.com/computeDomain"]) - assert.Equal(t, "workload", workloadTemplate.Labels["resource.nvidia.com/computeDomainTarget"]) + assert.Equal(t, test.computeDomain.GetName(), workloadTemplate.Labels[consts.ComputeDomainTemplateLabel]) + assert.Equal(t, "workload", workloadTemplate.Labels[consts.ComputeDomainTemplateTargetLabel]) // Check labels copied into generated claims - assert.Equal(t, test.computeDomain.GetName(), workloadTemplate.Spec.Labels["nvidia.com/computeDomain"]) + assert.Equal(t, test.computeDomain.GetName(), workloadTemplate.Spec.Labels[consts.ComputeDomainClaimLabel]) // Check finalizers assert.Contains(t, workloadTemplate.Finalizers, consts.ComputeDomainFinalizer) } else { @@ -201,3 +202,243 @@ func TestComputeDomainReconciler_Reconcile_NotFound(t *testing.T) { assert.NoError(t, err) assert.Equal(t, ctrl.Result{}, result) } + +func TestComputeDomainReconciler_StatusUpdate(t *testing.T) { + scheme := runtime.NewScheme() + _ = resourceapi.AddToScheme(scheme) + _ = computedomainv1beta1.AddToScheme(scheme) + + tests := map[string]struct { + computeDomain *computedomainv1beta1.ComputeDomain + resourceClaims []client.Object + expectedStatus string + expectedNodes []string + }{ + "status Ready when numNodes is 0 with no claims": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 0, + }, + }, + resourceClaims: nil, + expectedStatus: computedomainv1beta1.ComputeDomainStatusReady, + expectedNodes: []string{}, + }, + "status Ready when numNodes is 0 with allocated claims": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 0, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "test-domain", "node-1"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusReady, + expectedNodes: []string{"node-1"}, + }, + "status NotReady when numNodes not reached": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 2, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "test-domain", "node-1"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusNotReady, + expectedNodes: []string{"node-1"}, + }, + "status Ready when numNodes reached": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 2, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "test-domain", "node-1"), + createAllocatedResourceClaim("claim-2", "default", "test-domain", "node-2"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusReady, + expectedNodes: []string{"node-1", "node-2"}, + }, + "status Ready when numNodes exceeded": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 2, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "test-domain", "node-1"), + createAllocatedResourceClaim("claim-2", "default", "test-domain", "node-2"), + createAllocatedResourceClaim("claim-3", "default", "test-domain", "node-3"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusReady, + expectedNodes: []string{"node-1", "node-2", "node-3"}, + }, + "multiple claims on same node counted once": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 2, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "test-domain", "node-1"), + createAllocatedResourceClaim("claim-2", "default", "test-domain", "node-1"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusNotReady, + expectedNodes: []string{"node-1"}, + }, + "unallocated claims are ignored": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 1, + }, + }, + resourceClaims: []client.Object{ + createUnallocatedResourceClaim("claim-1", "default", "test-domain"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusNotReady, + expectedNodes: []string{}, + }, + "claims from different domain are ignored": { + computeDomain: &computedomainv1beta1.ComputeDomain{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-domain", + Namespace: "default", + UID: "test-uid", + Finalizers: []string{consts.ComputeDomainFinalizer}, + }, + Spec: computedomainv1beta1.ComputeDomainSpec{ + NumNodes: 1, + }, + }, + resourceClaims: []client.Object{ + createAllocatedResourceClaim("claim-1", "default", "other-domain", "node-1"), + }, + expectedStatus: computedomainv1beta1.ComputeDomainStatusNotReady, + expectedNodes: []string{}, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + objs := []client.Object{test.computeDomain} + objs = append(objs, test.resourceClaims...) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objs...). + WithStatusSubresource(test.computeDomain). + Build() + + reconciler := &controller.ComputeDomainReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: test.computeDomain.GetName(), + Namespace: test.computeDomain.GetNamespace(), + }, + } + + result, err := reconciler.Reconcile(context.Background(), req) + require.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result) + + updatedDomain := &computedomainv1beta1.ComputeDomain{} + err = fakeClient.Get(context.Background(), req.NamespacedName, updatedDomain) + require.NoError(t, err) + + assert.Equal(t, test.expectedStatus, updatedDomain.Status.Status) + assert.Len(t, updatedDomain.Status.Nodes, len(test.expectedNodes)) + + actualNodeNames := make([]string, len(updatedDomain.Status.Nodes)) + for i, node := range updatedDomain.Status.Nodes { + actualNodeNames[i] = node.Name + } + assert.Equal(t, test.expectedNodes, actualNodeNames) + }) + } +} + +func createAllocatedResourceClaim(name, namespace, domainName, nodeName string) *resourceapi.ResourceClaim { + return &resourceapi.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + consts.ComputeDomainClaimLabel: domainName, + }, + }, + Status: resourceapi.ResourceClaimStatus{ + Allocation: &resourceapi.AllocationResult{ + Devices: resourceapi.DeviceAllocationResult{ + Results: []resourceapi.DeviceRequestAllocationResult{ + { + Pool: nodeName, + Device: "channel-0", + Request: "channel", + }, + }, + }, + }, + }, + } +} + +func createUnallocatedResourceClaim(name, namespace, domainName string) *resourceapi.ResourceClaim { + return &resourceapi.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + consts.ComputeDomainClaimLabel: domainName, + }, + }, + Status: resourceapi.ResourceClaimStatus{}, + } +} diff --git a/internal/compute-domain-dra-plugin/driver_test.go b/internal/compute-domain-dra-plugin/driver_test.go index 3977730..0d809e6 100644 --- a/internal/compute-domain-dra-plugin/driver_test.go +++ b/internal/compute-domain-dra-plugin/driver_test.go @@ -68,7 +68,7 @@ func TestComputeDomainDriver_PrepareResourceClaims(t *testing.T) { Name: "test-claim", Namespace: "default", Labels: map[string]string{ - "resource.nvidia.com/computeDomain": "test-compute-domain", + consts.ComputeDomainTemplateLabel: "test-compute-domain", }, }, Spec: resourceapi.ResourceClaimSpec{ diff --git a/pkg/compute-domain/consts/consts.go b/pkg/compute-domain/consts/consts.go index 8ecc673..582d142 100644 --- a/pkg/compute-domain/consts/consts.go +++ b/pkg/compute-domain/consts/consts.go @@ -11,4 +11,13 @@ const ( // ComputeDomainFinalizer is the finalizer added to ComputeDomain CRs ComputeDomainFinalizer = "computedomain.resource.nvidia.com/finalizer" + + // ComputeDomainClaimLabel is the label used on ResourceClaims to identify which ComputeDomain they belong to + ComputeDomainClaimLabel = "nvidia.com/computeDomain" + + // ComputeDomainTemplateLabel is the label used on ResourceClaimTemplates to identify which ComputeDomain they belong to + ComputeDomainTemplateLabel = "resource.nvidia.com/computeDomain" + + // ComputeDomainTemplateTargetLabel is the label used on ResourceClaimTemplates to identify the target type + ComputeDomainTemplateTargetLabel = "resource.nvidia.com/computeDomainTarget" ) diff --git a/test/integration/compute_domain_test.go b/test/integration/compute_domain_test.go index 6cf7981..8d2d149 100644 --- a/test/integration/compute_domain_test.go +++ b/test/integration/compute_domain_test.go @@ -10,6 +10,7 @@ import ( computedomainv1beta1 "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/run-ai/fake-gpu-operator/pkg/compute-domain/consts" resourceapi "k8s.io/api/resource/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -64,8 +65,8 @@ var _ = Describe("Compute Domain Controller Integration Tests", func() { Expect(err).NotTo(HaveOccurred()) // Check that it has the correct labels - Expect(rct.Labels).To(HaveKeyWithValue("resource.nvidia.com/computeDomain", computeDomainName)) - Expect(rct.Labels).To(HaveKeyWithValue("resource.nvidia.com/computeDomainTarget", "workload")) + Expect(rct.Labels).To(HaveKeyWithValue(consts.ComputeDomainTemplateLabel, computeDomainName)) + Expect(rct.Labels).To(HaveKeyWithValue(consts.ComputeDomainTemplateTargetLabel, "workload")) // Check ResourceClaimTemplate spec Expect(rct.Spec.Spec.Devices.Requests).To(HaveLen(1)) @@ -128,6 +129,22 @@ var _ = Describe("Compute Domain Controller Integration Tests", func() { // Wait for pod to be ready waitForPodReady(namespace, podName, podReadyTimeout) + + // Verify ComputeDomain status shows the allocated node + Eventually(func() error { + cd, err := nvidiaClient.ResourceV1beta1().ComputeDomains(namespace).Get( + context.Background(), computeDomainName, metav1.GetOptions{}) + if err != nil { + return err + } + if len(cd.Status.Nodes) == 0 { + return fmt.Errorf("no nodes in ComputeDomain status") + } + if cd.Status.Status != "Ready" { + return fmt.Errorf("ComputeDomain status is %s, expected Ready", cd.Status.Status) + } + return nil + }).WithTimeout(30 * time.Second).Should(Succeed()) }) })