diff --git a/deployments/multus-daemonset-thick.yml b/deployments/multus-daemonset-thick.yml index 6b70571bd..f46b60284 100644 --- a/deployments/multus-daemonset-thick.yml +++ b/deployments/multus-daemonset-thick.yml @@ -72,6 +72,15 @@ rules: - list - update - watch + - apiGroups: + - "resource.k8s.io" + resources: + - resourceclaims + - resourceclaims/status + - resourceslices + verbs: + - get + - list - apiGroups: - "" - events.k8s.io diff --git a/deployments/multus-daemonset.yml b/deployments/multus-daemonset.yml index 40fa51932..d6b5daaba 100644 --- a/deployments/multus-daemonset.yml +++ b/deployments/multus-daemonset.yml @@ -79,6 +79,44 @@ rules: - create - patch - update +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: multus +rules: + - apiGroups: ["k8s.cni.cncf.io"] + resources: + - '*' + verbs: + - '*' + - apiGroups: + - "" + resources: + - pods + - pods/status + verbs: + - get + - list + - update + - watch + - apiGroups: + - "resource.k8s.io" + resources: + - resourceclaims + - resourceclaims/status + - resourceslices + verbs: + - get + - list + - apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch + - update --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/docs/how-to-use.md b/docs/how-to-use.md index b4cdfc426..d61c39ceb 100644 --- a/docs/how-to-use.md +++ b/docs/how-to-use.md @@ -645,112 +645,132 @@ If you wish to have auto configuration use the `readinessindicatorfile` in the c ### Run pod with network annotation and Dynamic Resource Allocation driver -> :warning: Dynamic Resource Allocation (DRA) is [currently an alpha](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/), -> and is subject to change. Please consider this functionality as a preview. The architecture and usage of DRA in -> Multus CNI may change in the future as this technology matures. -> -> The current DRA integration is based on the DRA API for Kubernetes 1.26 to 1.30. With Kubernetes 1.31, the DRA API -> will change and multus doesn't integrate with the new API yet. -Dynamic Resource Allocation is alternative mechanism to device plugin which allows to requests pod and container -resources. +Dynamic Resource Allocation is an alternative mechanism to device plugin which allows pods to request pod and container +resources dynamically. -The following sections describe how to use DRA with multus and NVIDIA DRA driver. Other DRA networking driver vendors -should follow similar concepts to make use of multus DRA support. +The following sections describe how to use DRA with Multus. DRA networking driver vendors should follow similar +concepts to make use of Multus DRA support. #### Prerequisite -1. Kubernetes 1.27 -2. Container Runtime with CDI support enabled -3. Kubernetes runtime-config=resource.k8s.io/v1alpha2 -4. Kubernetes feature-gates=DynamicResourceAllocation=True,KubeletPodResourcesDynamicResources=true +1. Kubernetes 1.34+ #### Install DRA driver -The current example uses NVIDIA DRA driver for networking. This DRA driver is not publicly available. An alternative to -this DRA driver is available at [dra-example-driver](https://github.com/kubernetes-sigs/dra-example-driver). +You need to install a DRA driver that provides network devices. For example, you can use the SR-IOV DRA driver or +other DRA networking drivers. Refer to your DRA driver documentation for installation instructions. -#### Create dynamic resource class with NVIDIA network DRA driver +The DRA drive MUST expose the following attribute `k8s.cni.cncf.io/deviceID` containing the device ID +that multus will pass to the CNI -The `ResourceClass` defines the resource pool of `sf-pool-1`. +#### Create network attachment definition with resource name + +The `k8s.v1.cni.cncf.io/resourceName` annotation is used to associate a NetworkAttachmentDefinition with DRA resources. +The format is: `/` where: +- `pod-resource-name`: The name of the resource claim in the pod's `spec.resourceClaims` +- `result-name`: The name of the device request in the ResourceClaimTemplate's `spec.devices.requests` + +Multus queries the ResourceClaim and ResourceSlices APIs to fetch information about allocated DRA devices. When a +NetworkAttachmentDefinition has a `resourceName` annotation that matches a pod's resource claim and result name, +Multus will pass the `k8s.cni.cncf.io/deviceID` to the CNI plugin in the DeviceID field. + +##### NetworkAttachmentDefinition for SR-IOV example: + +Following command creates a NetworkAttachmentDefinition for SR-IOV. The `resourceName` annotation `sriov/vf` indicates: +- `sriov`: matches the pod's resourceClaim name +- `vf`: matches the device request name in the ResourceClaimTemplate ``` # Execute following command at Kubernetes master cat < 1 { + logging.Errorf("getDeviceID: expected 1 resource slice for %s, got %d", key, len(matchingSlices)) + return "", fmt.Errorf("expected 1 resource slice, got %d", len(matchingSlices)) + } + resourceSlice = matchingSlices[0] + d.resourceSliceCache[key] = resourceSlice + logging.Debugf("getDeviceID: cached resource slice %s with %d devices", key, len(resourceSlice.Spec.Devices)) + } else { + logging.Debugf("getDeviceID: using cached resource slice %s", key) + } + + logging.Debugf("getDeviceID: searching for device %s in %d devices", result.Device, len(resourceSlice.Spec.Devices)) + for _, device := range resourceSlice.Spec.Devices { + if device.Name != result.Device { + continue + } + logging.Debugf("getDeviceID: found device %s, checking for deviceID attribute", device.Name) + deviceID, exists := device.Attributes["k8s.cni.cncf.io/deviceID"] + if !exists { + logging.Debugf("getDeviceID: device %s does not have k8s.cni.cncf.io/deviceID attribute", device.Name) + continue + } + logging.Verbosef("getDeviceID: successfully retrieved device ID %s for device %s (driver/pool: %s)", + *deviceID.StringValue, result.Device, key) + return *deviceID.StringValue, nil + } + + err := fmt.Errorf("device %s not found for claim resource %s/%s", result.Device, result.Driver, result.Pool) + logging.Errorf("getDeviceID: %v", err) + return "", err +} diff --git a/pkg/draclient/draclient_suite_test.go b/pkg/draclient/draclient_suite_test.go new file mode 100644 index 000000000..5423c0965 --- /dev/null +++ b/pkg/draclient/draclient_suite_test.go @@ -0,0 +1,31 @@ +// Copyright (c) 2025 Multus Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package draclient + +// disable dot-imports only for testing +//revive:disable:dot-imports +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "testing" +) + +func TestDRAClient(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "draclient") +} + + diff --git a/pkg/draclient/draclient_test.go b/pkg/draclient/draclient_test.go new file mode 100644 index 000000000..8e7c20a70 --- /dev/null +++ b/pkg/draclient/draclient_test.go @@ -0,0 +1,1024 @@ +// Copyright (c) 2025 Multus Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package draclient + +// disable dot-imports only for testing +//revive:disable:dot-imports +import ( + "context" + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + v1 "k8s.io/api/core/v1" + resourcev1api "k8s.io/api/resource/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + k8sTypes "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/fake" + + "gopkg.in/k8snetworkplumbingwg/multus-cni.v4/pkg/types" +) + +var _ = Describe("DRA Client operations", func() { + + Describe("NewClient", func() { + It("should create a new DRA client successfully", func() { + fakeClient := fake.NewSimpleClientset() + client := NewClient(fakeClient.ResourceV1()) + Expect(client).NotTo(BeNil()) + }) + }) + + Describe("GetPodResourceMap", func() { + var ( + fakeClient *fake.Clientset + draClient ClientInterface + ) + + BeforeEach(func() { + fakeClient = fake.NewSimpleClientset() + draClient = NewClient(fakeClient.ResourceV1()) + }) + + Context("when pod has no resource claims", func() { + It("should return empty resource map without error", func() { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{}, + }, + } + + resourceMap := make(map[string]*types.ResourceInfo) + err := draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).NotTo(HaveOccurred()) + Expect(resourceMap).To(BeEmpty()) + }) + }) + + Context("when resource claim exists with valid device allocation", func() { + It("should successfully populate resource map with device IDs", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + + // Create ResourceSlice with device + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim with allocation + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + // Create pod with resource claim status + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects to fake client + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).NotTo(HaveOccurred()) + + // Verify + expectedKey := fmt.Sprintf("%s/%s", claimName, requestName) + Expect(resourceMap).To(HaveKey(expectedKey)) + Expect(resourceMap[expectedKey].DeviceIDs).To(Equal([]string{deviceID})) + }) + }) + + Context("when multiple devices are allocated to the same claim/request", func() { + It("should append all device IDs to the resource map", func() { + claimName := "multi-device-claim" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + device1Name := "device-1" + device2Name := "device-2" + deviceID1 := "pci:0000:00:01.0" + deviceID2 := "pci:0000:00:02.0" + + // Create ResourceSlice with multiple devices + deviceID1Value := deviceID1 + deviceID2Value := deviceID2 + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: device1Name, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceID1Value, + }, + }, + }, + { + Name: device2Name, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceID2Value, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim with multiple device allocations + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: device1Name, + }, + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: device2Name, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects to fake client + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).NotTo(HaveOccurred()) + + // Verify + expectedKey := fmt.Sprintf("%s/%s", claimName, requestName) + Expect(resourceMap).To(HaveKey(expectedKey)) + Expect(resourceMap[expectedKey].DeviceIDs).To(ConsistOf(deviceID1, deviceID2)) + }) + }) + + Context("when resource claim does not exist", func() { + It("should return an error", func() { + claimName := "non-existent-claim" + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + resourceMap := make(map[string]*types.ResourceInfo) + err := draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found")) + }) + }) + + Context("when resource slice does not exist", func() { + It("should return an error", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + + // Create ResourceClaim but no ResourceSlice + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add only resource claim + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("expected 1 resource slice, got 0")) + }) + }) + + Context("when device does not have deviceID attribute", func() { + It("should return an error", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + + // Create ResourceSlice with device but NO deviceID attribute + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + // Missing k8s.cni.cncf.io/deviceID + "some-other-attribute": { + StringValue: func() *string { s := "value"; return &s }(), + }, + }, + }, + }, + }, + } + + // Create ResourceClaim + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found for claim resource")) + }) + }) + + Context("when device name in allocation does not match any device in slice", func() { + It("should return an error", func() { + claimName := "test-claim" + deviceName := "device-1" + wrongDeviceName := "wrong-device" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + + // Create ResourceSlice with device + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim with WRONG device name + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: wrongDeviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found for claim resource")) + }) + }) + + Context("when caching is working correctly", func() { + It("should cache resource claims and slices", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + + // Create ResourceSlice with device + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects to fake client + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // First call - should populate cache + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).NotTo(HaveOccurred()) + + expectedKey := fmt.Sprintf("%s/%s", claimName, requestName) + Expect(resourceMap).To(HaveKey(expectedKey)) + Expect(resourceMap[expectedKey].DeviceIDs).To(Equal([]string{deviceID})) + + // Delete the objects from the API server + err = fakeClient.ResourceV1().ResourceClaims("default").Delete(context.TODO(), claimName, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + err = fakeClient.ResourceV1().ResourceSlices().Delete(context.TODO(), resourceSlice.Name, metav1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Second call - should use cache and succeed even though objects are deleted + resourceMap2 := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod, resourceMap2) + Expect(err).NotTo(HaveOccurred()) + Expect(resourceMap2).To(HaveKey(expectedKey)) + Expect(resourceMap2[expectedKey].DeviceIDs).To(Equal([]string{deviceID})) + }) + }) + + Context("when pod has multiple different claims", func() { + It("should populate resource map with all claims sequentially", func() { + // Note: Due to fake client limitations with field selectors, + // we test each claim separately to avoid conflicts + + claim1Name := "claim-1" + device1Name := "device-1" + driver1Name := "driver1.example.com" + pool1Name := "pool-1" + request1Name := "gpu" + deviceID1 := "pci:0000:00:01.0" + + // First claim setup + deviceID1Value := deviceID1 + resourceSlice1 := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice-1", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driver1Name, + Pool: resourcev1api.ResourcePool{ + Name: pool1Name, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: device1Name, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceID1Value, + }, + }, + }, + }, + }, + } + + resourceClaim1 := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claim1Name, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: request1Name, + Driver: driver1Name, + Pool: pool1Name, + Device: device1Name, + }, + }, + }, + }, + }, + } + + claim1NamePtr := claim1Name + pod1 := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-1", + Namespace: "default", + UID: k8sTypes.UID("test-uid-1"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claim1NamePtr, + }, + }, + }, + } + + // Add first claim objects + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim1, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice1, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Test first claim + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient.GetPodResourceMap(pod1, resourceMap) + Expect(err).NotTo(HaveOccurred()) + + expectedKey1 := fmt.Sprintf("%s/%s", claim1Name, request1Name) + Expect(resourceMap).To(HaveKey(expectedKey1)) + Expect(resourceMap[expectedKey1].DeviceIDs).To(Equal([]string{deviceID1})) + + // Now test second claim with a fresh client to avoid field selector issues + claim2Name := "claim-2" + device2Name := "device-2" + driver2Name := "driver2.example.com" + pool2Name := "pool-2" + request2Name := "nic" + deviceID2 := "pci:0000:00:02.0" + + fakeClient2 := fake.NewSimpleClientset() + draClient2 := NewClient(fakeClient2.ResourceV1()) + + deviceID2Value := deviceID2 + resourceSlice2 := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice-2", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driver2Name, + Pool: resourcev1api.ResourcePool{ + Name: pool2Name, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: device2Name, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceID2Value, + }, + }, + }, + }, + }, + } + + resourceClaim2 := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claim2Name, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: request2Name, + Driver: driver2Name, + Pool: pool2Name, + Device: device2Name, + }, + }, + }, + }, + }, + } + + claim2NamePtr := claim2Name + pod2 := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod-2", + Namespace: "default", + UID: k8sTypes.UID("test-uid-2"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claim2NamePtr, + }, + }, + }, + } + + // Add second claim objects + _, err = fakeClient2.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim2, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient2.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice2, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Test second claim + resourceMap2 := make(map[string]*types.ResourceInfo) + err = draClient2.GetPodResourceMap(pod2, resourceMap2) + Expect(err).NotTo(HaveOccurred()) + + expectedKey2 := fmt.Sprintf("%s/%s", claim2Name, request2Name) + Expect(resourceMap2).To(HaveKey(expectedKey2)) + Expect(resourceMap2[expectedKey2].DeviceIDs).To(Equal([]string{deviceID2})) + }) + }) + + Context("when resource map already has an entry for the claim/request", func() { + It("should append device IDs to existing entry", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + existingDeviceID := "pci:0000:00:00.0" + + // Create ResourceSlice with device + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add objects + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = fakeClient.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Pre-populate resourceMap with existing entry + resourceMap := make(map[string]*types.ResourceInfo) + expectedKey := fmt.Sprintf("%s/%s", claimName, requestName) + resourceMap[expectedKey] = &types.ResourceInfo{ + DeviceIDs: []string{existingDeviceID}, + } + + // Execute + err = draClient.GetPodResourceMap(pod, resourceMap) + Expect(err).NotTo(HaveOccurred()) + + // Verify device ID was appended + Expect(resourceMap).To(HaveKey(expectedKey)) + Expect(resourceMap[expectedKey].DeviceIDs).To(Equal([]string{existingDeviceID, deviceID})) + }) + }) + + Context("when multiple resource slices exist for the same driver/pool", func() { + It("should return an error", func() { + claimName := "test-claim" + deviceName := "device-1" + driverName := "test-driver.example.com" + poolName := "test-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + + // Create TWO ResourceSlices with same driver/pool + deviceIDValue := deviceID + resourceSlice1 := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice-1", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + resourceSlice2 := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice-2", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: "device-2", + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + // Create ResourceClaim + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: "default", + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + claimNamePtr := claimName + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + UID: k8sTypes.UID("test-uid"), + }, + Status: v1.PodStatus{ + ResourceClaimStatuses: []v1.PodResourceClaimStatus{ + { + ResourceClaimName: &claimNamePtr, + }, + }, + }, + } + + // Add all objects - this creates ambiguity + _, err := fakeClient.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Add both resource slices with same driver/pool + var objects []runtime.Object + objects = append(objects, resourceSlice1, resourceSlice2) + fakeClient2 := fake.NewSimpleClientset(objects...) + draClient2 := NewClient(fakeClient2.ResourceV1()) + + // Also need to add the claim to the new client + _, err = fakeClient2.ResourceV1().ResourceClaims("default").Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Execute - should fail because there are 2 slices + resourceMap := make(map[string]*types.ResourceInfo) + err = draClient2.GetPodResourceMap(pod, resourceMap) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("expected 1 resource slice, got 2")) + }) + }) + }) +}) diff --git a/pkg/k8sclient/k8sclient.go b/pkg/k8sclient/k8sclient.go index b809a5376..bebbf1b56 100644 --- a/pkg/k8sclient/k8sclient.go +++ b/pkg/k8sclient/k8sclient.go @@ -42,6 +42,7 @@ import ( netclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" netlister "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/listers/k8s.cni.cncf.io/v1" netutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" + "gopkg.in/k8snetworkplumbingwg/multus-cni.v4/pkg/draclient" "gopkg.in/k8snetworkplumbingwg/multus-cni.v4/pkg/kubeletclient" "gopkg.in/k8snetworkplumbingwg/multus-cni.v4/pkg/logging" "gopkg.in/k8snetworkplumbingwg/multus-cni.v4/pkg/types" @@ -317,6 +318,13 @@ func getKubernetesDelegate(client *ClientInfo, net *types.NetworkSelectionElemen if err != nil { return nil, resourceMap, logging.Errorf("getKubernetesDelegate: failed to get resourceMap from ResourceClient: %v", err) } + + dc := draclient.NewClient(client.Client.ResourceV1()) + err = dc.GetPodResourceMap(pod, resourceMap) + if err != nil { + return nil, resourceMap, logging.Errorf("getKubernetesDelegate: failed to get resourceMap from DRA client: %v", err) + } + logging.Debugf("getKubernetesDelegate: resourceMap instance: %+v", resourceMap) } diff --git a/pkg/k8sclient/k8sclient_test.go b/pkg/k8sclient/k8sclient_test.go index 7e9a9332c..6c28e4ce5 100644 --- a/pkg/k8sclient/k8sclient_test.go +++ b/pkg/k8sclient/k8sclient_test.go @@ -18,6 +18,7 @@ package k8sclient // disable dot-imports only for testing //revive:disable:dot-imports import ( + "context" "fmt" "os" "path/filepath" @@ -33,6 +34,9 @@ import ( netfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" netutils "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/utils" + v1 "k8s.io/api/core/v1" + resourcev1api "k8s.io/api/resource/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" . "github.com/onsi/ginkgo/v2" @@ -1553,4 +1557,550 @@ users: Expect(err).NotTo(HaveOccurred()) }) }) + + Describe("DRA (Dynamic Resource Allocation) integration", func() { + var tmpDir string + var err error + + BeforeEach(func() { + tmpDir, err = os.MkdirTemp("", "multus_dra_test") + Expect(err).NotTo(HaveOccurred()) + }) + + AfterEach(func() { + err := os.RemoveAll(tmpDir) + Expect(err).NotTo(HaveOccurred()) + }) + + Context("when pod has DRA resources with device plugin resources", func() { + It("should combine DRA and device plugin resource maps", func() { + // This test verifies that getKubernetesDelegate correctly integrates + // DRA resources with traditional device plugin resources + + const fakePodName string = "dra-test-pod" + const fakeNamespace string = "default" + + // Create a network attachment definition + netAttachDef := `{ + "name": "sriov-net", + "type": "sriov", + "cniVersion": "0.3.1" + }` + + // Create fake pod with DRA resource claims + claimName := "gpu-claim" + claimNamePtr := &claimName + fakePod := testutils.NewFakePod(fakePodName, "sriov-net", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{ + { + ResourceClaimName: claimNamePtr, + }, + } + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition (without resourceName annotation to avoid kubeletclient) + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "sriov-net", netAttachDef) + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Create ResourceClaim + deviceName := "gpu-1" + driverName := "gpu.example.com" + poolName := "gpu-pool" + requestName := "gpu" + deviceID := "pci:0000:00:01.0" + + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: fakeNamespace, + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + // Add DRA resources to fake client + _, err = clientInfo.Client.ResourceV1().ResourceClaims(fakeNamespace).Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = clientInfo.Client.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Get the network selection element + net := &types.NetworkSelectionElement{ + Name: "sriov-net", + Namespace: fakeNamespace, + } + + // Call getKubernetesDelegate + // Note: Without resourceName annotation, DRA client is not invoked automatically + // This test verifies the delegate is created successfully for pods with DRA claims + delegate, resourceMap, err := getKubernetesDelegate(clientInfo, net, tmpDir, fakePod, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(delegate).NotTo(BeNil()) + + // Verify resourceMap is initialized (but empty since no resourceName annotation) + Expect(len(resourceMap)).To(Equal(0)) + }) + }) + + Context("when GetNetworkDelegates is called with DRA-enabled pod", func() { + It("should successfully retrieve delegates with DRA resources", func() { + const fakePodName string = "dra-network-pod" + const fakeNamespace string = "default" + + // Create network attachment definition + netAttachDef := `{ + "name": "dra-network", + "type": "bridge", + "cniVersion": "0.3.1" + }` + + // Create fake pod with DRA resource claims + claimName := "network-claim" + claimNamePtr := &claimName + fakePod := testutils.NewFakePod(fakePodName, "dra-network", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{ + { + ResourceClaimName: claimNamePtr, + }, + } + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition with resource name annotation + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "dra-network", netAttachDef) + nad.Annotations = map[string]string{ + resourceNameAnnot: "dra-network-claim/gpu", + } + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Create DRA resources + deviceName := "nic-1" + driverName := "network.example.com" + poolName := "network-pool" + requestName := "gpu" + deviceID := "pci:0000:00:02.0" + + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "network-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: fakeNamespace, + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + // Add DRA resources to fake client + _, err = clientInfo.Client.ResourceV1().ResourceClaims(fakeNamespace).Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = clientInfo.Client.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Get pod network + networks, err := GetPodNetwork(fakePod) + Expect(err).NotTo(HaveOccurred()) + Expect(networks).NotTo(BeEmpty()) + + // Create NetConf + conf := &types.NetConf{ + ConfDir: tmpDir, + } + + // Get network delegates + resourceMap := make(map[string]*types.ResourceInfo) + delegates, err := GetNetworkDelegates(clientInfo, fakePod, networks, conf, resourceMap) + Expect(err).NotTo(HaveOccurred()) + Expect(delegates).NotTo(BeEmpty()) + Expect(delegates[0]).NotTo(BeNil()) + }) + }) + + Context("when DRA resources are used in TryLoadPodDelegates", func() { + It("should successfully load delegates with DRA resources", func() { + const fakePodName string = "dra-delegate-pod" + const fakeNamespace string = "default" + + // Create network attachment definition + netAttachDef := `{ + "name": "dra-delegate-network", + "type": "macvlan", + "cniVersion": "0.3.1" + }` + + // Create fake pod with DRA resource claims + claimName := "delegate-claim" + claimNamePtr := &claimName + fakePod := testutils.NewFakePod(fakePodName, "dra-delegate-network", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{ + { + ResourceClaimName: claimNamePtr, + }, + } + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "dra-delegate-network", netAttachDef) + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Create DRA resources + deviceName := "macvlan-device" + driverName := "macvlan.example.com" + poolName := "macvlan-pool" + requestName := "nic" + deviceID := "pci:0000:00:03.0" + + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "macvlan-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: fakeNamespace, + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + // Add DRA resources to fake client + _, err = clientInfo.Client.ResourceV1().ResourceClaims(fakeNamespace).Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = clientInfo.Client.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Create NetConf with a default delegate + confStr := `{ + "name": "test-network", + "type": "multus", + "delegates": [{ + "name": "default-network", + "type": "bridge" + }], + "confDir": "` + tmpDir + `" + }` + + conf, err := types.LoadNetConf([]byte(confStr)) + Expect(err).NotTo(HaveOccurred()) + + // Try loading pod delegates + resourceMap := make(map[string]*types.ResourceInfo) + count, updatedClient, err := TryLoadPodDelegates(fakePod, conf, clientInfo, resourceMap) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedClient).NotTo(BeNil()) + Expect(count).To(BeNumerically(">", 0)) + }) + }) + + Context("when pod has no DRA resources", func() { + It("should handle pods without DRA resources gracefully", func() { + const fakePodName string = "no-dra-pod" + const fakeNamespace string = "default" + + // Create network attachment definition + netAttachDef := `{ + "name": "simple-network", + "type": "bridge", + "cniVersion": "0.3.1" + }` + + // Create fake pod WITHOUT DRA resource claims + fakePod := testutils.NewFakePod(fakePodName, "simple-network", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{} // Empty + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "simple-network", netAttachDef) + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Get the network selection element + net := &types.NetworkSelectionElement{ + Name: "simple-network", + Namespace: fakeNamespace, + } + + // Call getKubernetesDelegate - should work without DRA + delegate, resourceMap, err := getKubernetesDelegate(clientInfo, net, tmpDir, fakePod, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(delegate).NotTo(BeNil()) + // ResourceMap should be empty (no DRA resources) + Expect(len(resourceMap)).To(Equal(0)) + }) + }) + + Context("when DRA client fails to get resources", func() { + It("should return an error from getKubernetesDelegate", func() { + const fakePodName string = "dra-fail-pod" + const fakeNamespace string = "default" + + // Create network attachment definition + netAttachDef := `{ + "name": "fail-network", + "type": "bridge", + "cniVersion": "0.3.1" + }` + + // Create fake pod with DRA resource claims that don't exist + claimName := "non-existent-claim" + claimNamePtr := &claimName + fakePod := testutils.NewFakePod(fakePodName, "fail-network", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{ + { + ResourceClaimName: claimNamePtr, + }, + } + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition (without resourceName to focus on DRA) + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "fail-network", netAttachDef) + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Do NOT create the ResourceClaim - this should cause an error + + // Get the network selection element + net := &types.NetworkSelectionElement{ + Name: "fail-network", + Namespace: fakeNamespace, + } + + // Call getKubernetesDelegate - without resourceName annotation, DRA is only invoked when resourceMap is needed + // So this test verifies graceful handling when pod has DRA claims but no resourceName + delegate, resourceMap, err := getKubernetesDelegate(clientInfo, net, tmpDir, fakePod, nil) + Expect(err).NotTo(HaveOccurred()) + Expect(delegate).NotTo(BeNil()) + Expect(len(resourceMap)).To(Equal(0)) + }) + }) + + Context("when using getNetDelegate with DRA resources", func() { + It("should retrieve delegates with DRA resources populated", func() { + const fakePodName string = "netdelegate-dra-pod" + const fakeNamespace string = "default" + + // Create network attachment definition + netAttachDef := `{ + "name": "netdelegate-network", + "type": "ipvlan", + "cniVersion": "0.3.1" + }` + + // Create fake pod with DRA resource claims + claimName := "netdelegate-claim" + claimNamePtr := &claimName + fakePod := testutils.NewFakePod(fakePodName, "", "") + fakePod.Namespace = fakeNamespace + fakePod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{ + { + ResourceClaimName: claimNamePtr, + }, + } + + // Setup client + clientInfo := NewFakeClientInfo() + _, err = clientInfo.AddPod(fakePod) + Expect(err).NotTo(HaveOccurred()) + + // Create network attachment definition + nad := testutils.NewFakeNetAttachDef(fakeNamespace, "netdelegate-network", netAttachDef) + _, err = clientInfo.AddNetAttachDef(nad) + Expect(err).NotTo(HaveOccurred()) + + // Create DRA resources + deviceName := "ipvlan-device" + driverName := "ipvlan.example.com" + poolName := "ipvlan-pool" + requestName := "vnic" + deviceID := "pci:0000:00:04.0" + + deviceIDValue := deviceID + resourceSlice := &resourcev1api.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ipvlan-resource-slice", + }, + Spec: resourcev1api.ResourceSliceSpec{ + Driver: driverName, + Pool: resourcev1api.ResourcePool{ + Name: poolName, + ResourceSliceCount: 1, + }, + Devices: []resourcev1api.Device{ + { + Name: deviceName, + Attributes: map[resourcev1api.QualifiedName]resourcev1api.DeviceAttribute{ + "k8s.cni.cncf.io/deviceID": { + StringValue: &deviceIDValue, + }, + }, + }, + }, + }, + } + + resourceClaim := &resourcev1api.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: claimName, + Namespace: fakeNamespace, + }, + Status: resourcev1api.ResourceClaimStatus{ + Allocation: &resourcev1api.AllocationResult{ + Devices: resourcev1api.DeviceAllocationResult{ + Results: []resourcev1api.DeviceRequestAllocationResult{ + { + Request: requestName, + Driver: driverName, + Pool: poolName, + Device: deviceName, + }, + }, + }, + }, + }, + } + + // Add DRA resources to fake client + _, err = clientInfo.Client.ResourceV1().ResourceClaims(fakeNamespace).Create(context.TODO(), resourceClaim, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + _, err = clientInfo.Client.ResourceV1().ResourceSlices().Create(context.TODO(), resourceSlice, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + // Call getNetDelegate + resourceMap := make(map[string]*types.ResourceInfo) + delegate, updatedResourceMap, err := getNetDelegate(clientInfo, fakePod, "netdelegate-network", tmpDir, fakeNamespace, resourceMap) + Expect(err).NotTo(HaveOccurred()) + Expect(delegate).NotTo(BeNil()) + Expect(updatedResourceMap).NotTo(BeNil()) + }) + }) + }) }) diff --git a/pkg/kubeletclient/kubeletclient.go b/pkg/kubeletclient/kubeletclient.go index 630945e4c..7f3993cc0 100644 --- a/pkg/kubeletclient/kubeletclient.go +++ b/pkg/kubeletclient/kubeletclient.go @@ -21,7 +21,6 @@ import ( "net/url" "os" "path/filepath" - "strings" "time" "golang.org/x/net/context" @@ -111,7 +110,6 @@ type kubeletClient struct { } func (rc *kubeletClient) getPodResources(client podresourcesapi.PodResourcesListerClient) error { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -139,7 +137,6 @@ func (rc *kubeletClient) GetPodResourceMap(pod *v1.Pod) (map[string]*types.Resou if pr.Name == name && pr.Namespace == ns { for _, cnt := range pr.Containers { rc.getDevicePluginResources(cnt.Devices, resourceMap) - rc.getDRAResources(cnt.DynamicResources, resourceMap) } } } @@ -156,27 +153,6 @@ func (rc *kubeletClient) getDevicePluginResources(devices []*podresourcesapi.Con } } -func (rc *kubeletClient) getDRAResources(dynamicResources []*podresourcesapi.DynamicResource, resourceMap map[string]*types.ResourceInfo) { - for _, dynamicResource := range dynamicResources { - var deviceIDs []string - for _, claimResource := range dynamicResource.ClaimResources { - for _, cdiDevice := range claimResource.CdiDevices { - res := strings.Split(cdiDevice.Name, "=") - if len(res) == 2 { - deviceIDs = append(deviceIDs, res[1]) - } else { - logging.Errorf("GetPodResourceMap: Invalid CDI format") - } - } - } - if rInfo, ok := resourceMap[dynamicResource.ClaimName]; ok { - rInfo.DeviceIDs = append(rInfo.DeviceIDs, deviceIDs...) - } else { - resourceMap[dynamicResource.ClaimName] = &types.ResourceInfo{DeviceIDs: deviceIDs} - } - } -} - func hasKubeletAPIEndpoint(url *url.URL) bool { // Check for kubelet resource API socket file if _, err := os.Stat(url.Path); err != nil { diff --git a/pkg/kubeletclient/kubeletclient_test.go b/pkg/kubeletclient/kubeletclient_test.go index a9593a1c1..7661fcb7e 100644 --- a/pkg/kubeletclient/kubeletclient_test.go +++ b/pkg/kubeletclient/kubeletclient_test.go @@ -46,8 +46,8 @@ var ( ) type fakeResourceServer struct { - server *grpc.Server podresourcesapi.UnimplementedPodResourcesListerServer + server *grpc.Server } // TODO: This is stub code for test, but we may need to change for the testing we use this API in the future... @@ -81,7 +81,7 @@ func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodR { CdiDevices: cdiDevices, DriverName: draDriverName, - PoolName: poolName, + PoolName: poolName, DeviceName: deviceName, }, } @@ -249,34 +249,6 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() { Expect(resourceMap).To(Equal(outputRMap)) }) - It("should return no error with dynamic resource", func() { - podUID := k8sTypes.UID("9f94e27b-4233-43d6-bd10-f73b4de6f456") - fakePod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "dynamic-resource-pod-name", - Namespace: "dynamic-resource-pod-namespace", - UID: podUID, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "dynamic-resource-container-name", - }, - }, - }, - } - client, err := getKubeletClient(testKubeletSocket) - Expect(err).NotTo(HaveOccurred()) - - outputRMap := map[string]*mtypes.ResourceInfo{ - "resource-claim": {DeviceIDs: []string{"cdi-resource"}}, - } - resourceMap, err := client.GetPodResourceMap(fakePod) - Expect(err).NotTo(HaveOccurred()) - Expect(resourceMap).ShouldNot(BeNil()) - Expect(resourceMap).To(Equal(outputRMap)) - }) - It("should return an error with garbage socket value", func() { u, err := url.Parse("/badfilepath!?//") Expect(err).NotTo(HaveOccurred()) diff --git a/pkg/multus/multus.go b/pkg/multus/multus.go index a7941fe62..f8578d860 100644 --- a/pkg/multus/multus.go +++ b/pkg/multus/multus.go @@ -516,7 +516,7 @@ func delPlugins(exec invoke.Exec, pod *v1.Pod, args *skel.CmdArgs, k8sArgs *type // Check if we had any errors, and send them all back. if len(errorstrings) > 0 { - return fmt.Errorf("%s", strings.Join(errorstrings, " / ")) + return fmt.Errorf("errors: %s", strings.Join(errorstrings, " / ")) } return nil