diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index d258bf63..160220b9 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -137,7 +137,7 @@ jobs: /usr/local/bin/kubectl apply -f examples/deviceclass.yaml /usr/local/bin/kubectl apply -f examples/resourceclaim.yaml /usr/local/bin/kubectl wait --timeout=2m --for=condition=ready pods -l app=pod - /usr/local/bin/kubectl exec -it pod1 -- ip link show dummy0 + /usr/local/bin/kubectl exec -it pod1 -- ip link show eth99 - name: Upload Junit Reports if: always() diff --git a/examples/resourceclaim.yaml b/examples/resourceclaim.yaml index 7c21abef..df85686e 100644 --- a/examples/resourceclaim.yaml +++ b/examples/resourceclaim.yaml @@ -28,10 +28,10 @@ spec: - opaque: driver: dra.net parameters: - newName: "eth99" - address: "192.168.2.2" - mask: "255.255.255.0" - mtu: "1500" + interface: + name: "eth99" + addresses: + - "169.254.169.13/32" --- apiVersion: v1 kind: Pod diff --git a/examples/resourceclaimtemplate.yaml b/examples/resourceclaimtemplate.yaml index a98a23d4..e5886329 100644 --- a/examples/resourceclaimtemplate.yaml +++ b/examples/resourceclaimtemplate.yaml @@ -13,26 +13,52 @@ # limitations under the License. --- apiVersion: resource.k8s.io/v1beta1 +kind: DeviceClass +metadata: + name: multinic +spec: + selectors: + - cel: + expression: device.driver == "dra.net" +--- +apiVersion: resource.k8s.io/v1beta1 kind: ResourceClaimTemplate metadata: - name: dummy-interfaces + name: phy-interfaces-template spec: spec: devices: requests: - - name: req-dummy-template - deviceClassName: dra.net + - name: phy-interfaces-template + deviceClassName: multinic + selectors: + - cel: + expression: device.attributes["dra.net"].ifName == "dummy1" --- -apiVersion: v1 -kind: Pod +apiVersion: apps/v1 +kind: Deployment metadata: - name: pod0 + name: server-deployment labels: - app: pod + app: MyApp spec: - containers: - - name: ctr0 - image: registry.k8s.io/e2e-test-images/agnhost:2.39 - resourceClaims: - - name: dummy - resourceClaimTemplateName: dummy-interfaces + replicas: 1 + selector: + matchLabels: + app: MyApp + template: + metadata: + labels: + app: MyApp + spec: + resourceClaims: + - name: phy-interfaces + resourceClaimTemplateName: phy-interfaces-template + containers: + - name: agnhost + image: registry.k8s.io/e2e-test-images/agnhost:2.39 + args: + - netexec + - --http-port=80 + ports: + - containerPort: 80 diff --git a/pkg/apis/types.go b/pkg/apis/types.go new file mode 100644 index 00000000..bb77cc5c --- /dev/null +++ b/pkg/apis/types.go @@ -0,0 +1,38 @@ +/* +Copyright 2025 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +// NetworkConfig represents the desired state of all network interfaces and their associated routes. +type NetworkConfig struct { + Interface InterfaceConfig `json:"interface"` // Changed to a slice to support multiple interfaces + Routes []RouteConfig `json:"routes"` +} + +// InterfaceConfig represents the configuration for a single network interface. +type InterfaceConfig struct { + Name string `json:"name,omitempty"` // Logical name of the interface (e.g., "eth0", "enp0s3") + Addresses []string `json:"addresses,omitempty"` // IP addresses and their CIDR masks + MTU int32 `json:"mtu,omitempty"` // Maximum Transmission Unit, optional + HardwareAddr string `json:"hardwareAddr,omitempty"` // Read-only: Current hardware address (might be useful for GET) +} + +// RouteConfig represents a network route configuration. +type RouteConfig struct { + Destination string `json:"destination,omitempty"` // e.g., "0.0.0.0/0" for default, "10.0.0.0/8" + Gateway string `json:"gateway,omitempty"` // The "gateway" address, e.g., "192.168.1.1" + Source string `json:"source,omitempty"` // Optional source address for policy routing +} diff --git a/pkg/apis/validation.go b/pkg/apis/validation.go new file mode 100644 index 00000000..17c3a131 --- /dev/null +++ b/pkg/apis/validation.go @@ -0,0 +1,72 @@ +/* +Copyright 2025 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +import ( + "encoding/json" + "errors" + "fmt" + "net" + "net/netip" + + "k8s.io/apimachinery/pkg/runtime" +) + +// ValidateConfig validates the data in a runtime.RawExtension against the OpenAPI schema. +func ValidateConfig(raw *runtime.RawExtension) (*NetworkConfig, error) { + if raw == nil || raw.Raw == nil { + return nil, nil + } + // Check if raw.Raw is empty + if len(raw.Raw) == 0 { + return nil, nil + } + var errorsList []error + var config NetworkConfig + if err := json.Unmarshal(raw.Raw, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal YAML data: %w", err) + } + + for _, ip := range config.Interface.Addresses { + if _, err := netip.ParsePrefix(ip); err != nil { + errorsList = append(errorsList, fmt.Errorf("invalid IP in CIDR format %s", ip)) + } + } + + // Validate routes + for i, route := range config.Routes { + if route.Destination == "" { + errorsList = append(errorsList, fmt.Errorf("route %d: destination cannot be empty", i)) + } else { + // Validate Destination as CIDR or IP + if _, _, err := net.ParseCIDR(route.Destination); err != nil { + if net.ParseIP(route.Destination) == nil { + errorsList = append(errorsList, fmt.Errorf("route %d: invalid destination IP or CIDR '%s'", i, route.Destination)) + } + } + } + + if route.Gateway != "" { + if net.ParseIP(route.Gateway) == nil { + errorsList = append(errorsList, fmt.Errorf("route %d: invalid gateway IP '%s'", i, route.Gateway)) + } + } else { + errorsList = append(errorsList, fmt.Errorf("route %d: for destination '%s' must have a gateway", i, route.Destination)) + } + } + return &config, errors.Join(errorsList...) +} diff --git a/pkg/apis/validation_test.go b/pkg/apis/validation_test.go new file mode 100644 index 00000000..598d65a0 --- /dev/null +++ b/pkg/apis/validation_test.go @@ -0,0 +1,163 @@ +/* +Copyright 2025 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apis + +import ( + "strings" + "testing" + + "k8s.io/apimachinery/pkg/runtime" +) + +func TestValidateConfig(t *testing.T) { + tests := []struct { + name string + raw *runtime.RawExtension + wantErr bool + errMsgs []string + }{ + { + name: "valid config", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": { + "name": "eth0", + "addresses": ["192.168.1.10/24", "2001:db8::1/64"], + "mtu": 1500 + }, + "routes": [ + { + "destination": "0.0.0.0/0", + "gateway": "192.168.1.1" + }, + { + "destination": "2001:db8:abcd::/48", + "gateway": "2001:db::1" + } + ] + }`)}, + wantErr: false, + }, + { + name: "nil raw extension", + raw: nil, + wantErr: false, + }, + { + name: "nil raw field in raw extension", + raw: &runtime.RawExtension{Raw: nil}, + wantErr: false, + }, + { + name: "empty raw field in raw extension", + raw: &runtime.RawExtension{Raw: []byte{}}, + wantErr: false, + }, + { + name: "malformed json", + raw: &runtime.RawExtension{Raw: []byte(`{"interface": {"name": "eth0"`)}, // Missing closing brace + wantErr: true, + errMsgs: []string{"failed to unmarshal YAML data"}, + }, + { + name: "invalid interface IP CIDR", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": { + "name": "eth0", + "addresses": ["192.168.1.10/240"] + } + }`)}, + wantErr: true, + errMsgs: []string{"invalid IP in CIDR format 192.168.1.10/240"}, + }, + { + name: "route with empty destination", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": {"name": "eth0", "addresses": ["192.168.1.10/24"]}, + "routes": [{"gateway": "192.168.1.1"}] + }`)}, + wantErr: true, + errMsgs: []string{"route 0: destination cannot be empty"}, + }, + { + name: "route with invalid destination", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": {"name": "eth0", "addresses": ["192.168.1.10/24"]}, + "routes": [{"destination": "not-an-ip", "gateway": "192.168.1.1"}] + }`)}, + wantErr: true, + errMsgs: []string{"route 0: invalid destination IP or CIDR 'not-an-ip'"}, + }, + { + name: "route with no gateway", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": {"name": "eth0", "addresses": ["192.168.1.10/24"]}, + "routes": [{"destination": "10.0.0.0/8"}] + }`)}, + wantErr: true, + errMsgs: []string{"route 0: for destination '10.0.0.0/8' must have a gateway"}, + }, + { + name: "route with invalid gateway IP", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": {"name": "eth0", "addresses": ["192.168.1.10/24"]}, + "routes": [{"destination": "10.0.0.0/8", "gateway": "not-a-gateway"}] + }`)}, + wantErr: true, + errMsgs: []string{"route 0: invalid gateway IP 'not-a-gateway'"}, + }, + { + name: "multiple errors", + raw: &runtime.RawExtension{Raw: []byte(`{ + "interface": { + "name": "eth0", + "addresses": ["192.168.1.10/240", "10.0.0.1/invalid"] + }, + "routes": [ + {"destination": "", "gateway": "192.168.1.1"}, + {"destination": "not-an-ip", "gateway": "192.168.1.1"}, + {"destination": "10.0.0.0/8"}, + {"destination": "10.0.1.0/24", "gateway": "not-a-gateway"} + ] + }`)}, + wantErr: true, + errMsgs: []string{ + "invalid IP in CIDR format 192.168.1.10/240", + "invalid IP in CIDR format 10.0.0.1/invalid", + "route 0: destination cannot be empty", + "route 1: invalid destination IP or CIDR 'not-an-ip'", + "route 3: invalid gateway IP 'not-a-gateway'", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ValidateConfig(tt.raw) + if (err != nil) != tt.wantErr { + t.Errorf("ValidateConfig() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.wantErr { + for _, errMsg := range tt.errMsgs { + if !strings.Contains(err.Error(), errMsg) { + t.Errorf("ValidateConfig() error = %v, want to contain %v", err, errMsg) + } + } + } + }) + } +} diff --git a/pkg/driver/driver.go b/pkg/driver/driver.go index 84debbc8..7cea60fd 100644 --- a/pkg/driver/driver.go +++ b/pkg/driver/driver.go @@ -26,6 +26,7 @@ import ( "time" "github.com/google/cel-go/cel" + "github.com/google/dranet/pkg/apis" "github.com/google/dranet/pkg/filter" "github.com/google/dranet/pkg/inventory" @@ -230,7 +231,9 @@ func (np *NetworkDriver) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) if claim.Status.Allocation == nil { continue } + // final resourceClaim Status resourceClaimStatus := resourceapply.ResourceClaimStatus() + var netconf apis.NetworkConfig for _, result := range claim.Status.Allocation.Devices.Results { if result.Driver != np.driverName { continue @@ -243,10 +246,26 @@ func (np *NetworkDriver) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) if len(config.Requests) > 0 && !slices.Contains(config.Requests, result.Request) { continue } - klog.V(4).Infof("podStartHook Configuration %s", string(config.Opaque.Parameters.String())) - // TODO get config options here, it can add ips or commands - // to add routes, run dhcp, rename the interface ... whatever + // TODO: handle the case with multiple configurations (is that possible, should we merge them?) + conf, err := apis.ValidateConfig(&config.Opaque.Parameters) + if err != nil { + klog.Infof("podStartHook Configuration %+v error: %v", netconf, err) + return err + } + // TODO: define a strategy for multiple configs + if conf != nil { + netconf = *conf + break + } } + klog.V(4).Infof("podStartHook final Configuration %+v", netconf) + + // resourceClaim status for this specific device + resourceClaimStatusDevice := resourceapply. + AllocatedDeviceStatus(). + WithDevice(result.Device). + WithDriver(result.Driver). + WithPool(result.Pool) klog.Infof("RunPodSandbox allocation.Devices.Result: %#v", result) // TODO signal this via DRA @@ -258,42 +277,57 @@ func (np *NetworkDriver) RunPodSandbox(ctx context.Context, pod *api.PodSandbox) } } + // configure routes + err = netnsRouting(ns, netconf.Routes) + if err != nil { + klog.Infof("RunPodSandbox error configuring device %s namespace %s routing: %v", result.Device, ns, err) + resourceClaimStatusDevice.WithConditions( + metav1apply.Condition(). + WithType("NetworkReady"). + WithStatus(metav1.ConditionFalse). + WithReason("NetworkReadyError"). + WithMessage(err.Error()). + WithLastTransitionTime(metav1.Now()), + ) + errorList = append(errorList, err) + } else { + resourceClaimStatusDevice.WithConditions( + metav1apply.Condition(). + WithType("NetworkReady"). + WithStatus(metav1.ConditionTrue). + WithReason("NetworkReady"). + WithLastTransitionTime(metav1.Now()), + ) + } + // TODO config options to rename the device and pass parameters // use https://github.com/opencontainers/runtime-spec/pull/1271 - networkData, err := nsAttachNetdev(result.Device, ns, result.Device) + networkData, err := nsAttachNetdev(result.Device, ns, netconf.Interface) if err != nil { klog.Infof("RunPodSandbox error moving device %s to namespace %s: %v", result.Device, ns, err) - resourceClaimStatus = resourceClaimStatus.WithDevices( - resourceapply.AllocatedDeviceStatus(). - WithDevice(result.Device).WithDriver(result.Driver).WithPool(result.Pool). - WithConditions( - metav1apply.Condition(). - WithType("Ready"). - WithStatus(metav1.ConditionFalse). - WithReason("NetworkDeviceError"). - WithMessage(err.Error()). - WithLastTransitionTime(metav1.Now()), - ), + resourceClaimStatusDevice.WithConditions( + metav1apply.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("NetworkDeviceError"). + WithMessage(err.Error()). + WithLastTransitionTime(metav1.Now()), ) errorList = append(errorList, err) } else { - resourceClaimStatus = resourceClaimStatus.WithDevices( - resourceapply.AllocatedDeviceStatus(). - WithDevice(result.Device).WithDriver(result.Driver).WithPool(result.Pool). - WithConditions( - metav1apply.Condition(). - WithType("Ready"). - WithReason("NetworkDeviceReady"). - WithStatus(metav1.ConditionTrue). - WithLastTransitionTime(metav1.Now()), - ). - WithNetworkData(resourceapply.NetworkDeviceData(). - WithInterfaceName(networkData.InterfaceName). - WithHardwareAddress(networkData.HardwareAddress). - WithIPs(networkData.IPs...), - ), + resourceClaimStatusDevice.WithConditions( + metav1apply.Condition(). + WithType("Ready"). + WithReason("NetworkDeviceReady"). + WithStatus(metav1.ConditionTrue). + WithLastTransitionTime(metav1.Now()), + ).WithNetworkData(resourceapply.NetworkDeviceData(). + WithInterfaceName(networkData.InterfaceName). + WithHardwareAddress(networkData.HardwareAddress). + WithIPs(networkData.IPs...), ) } + resourceClaimStatus.WithDevices(resourceClaimStatusDevice) } resourceClaimApply := resourceapply.ResourceClaim(claim.Name, claim.Namespace).WithStatus(resourceClaimStatus) _, err = np.kubeClient.ResourceV1beta1().ResourceClaims(claim.Namespace).ApplyStatus(ctx, @@ -342,19 +376,35 @@ func (np *NetworkDriver) StopPodSandbox(ctx context.Context, pod *api.PodSandbox continue } + var netconf *apis.NetworkConfig for _, config := range claim.Status.Allocation.Devices.Config { if config.Opaque == nil { continue } - klog.V(4).Infof("podStopHook Configuration %s", string(config.Opaque.Parameters.String())) - // TODO get config options here, it can add ips or commands - // to add routes, run dhcp, rename the interface ... whatever + if len(config.Requests) > 0 && !slices.Contains(config.Requests, result.Request) { + continue + } + // TODO: handle the case with multiple configurations (is that possible, should we merge them?) + netconf, err = apis.ValidateConfig(&config.Opaque.Parameters) + if err != nil { + return err + } + if netconf != nil { + klog.V(4).Infof("Configuration %#v", netconf) + break + } } klog.V(4).Infof("podStopHook Device %s", result.Device) // TODO config options to rename the device and pass parameters // use https://github.com/opencontainers/runtime-spec/pull/1271 - err := nsDetachNetdev(ns, result.Device) + ifName := result.Device + outName := "" + if netconf.Interface.Name != "" { + ifName = netconf.Interface.Name + outName = result.Device + } + err := nsDetachNetdev(ns, ifName, outName) if err != nil { klog.Infof("StopPodSandbox error moving device %s to namespace %s: %v", result.Device, ns, err) continue @@ -365,6 +415,7 @@ func (np *NetworkDriver) StopPodSandbox(ctx context.Context, pod *api.PodSandbox } func (np *NetworkDriver) RemovePodSandbox(_ context.Context, pod *api.PodSandbox) error { + defer np.netdb.RemovePodNetns(podKey(pod)) klog.V(2).Infof("RemovePodSandbox pod %s/%s: ips=%v", pod.GetNamespace(), pod.GetName(), pod.GetIps()) // get the pod network namespace ns := getNetworkNamespace(pod) @@ -437,6 +488,7 @@ func (np *NetworkDriver) prepareResourceClaim(_ context.Context, claim *resource } } + var errorList []error var devices []kubeletplugin.Device for _, result := range claim.Status.Allocation.Devices.Results { requestName := result.Request @@ -446,6 +498,10 @@ func (np *NetworkDriver) prepareResourceClaim(_ context.Context, claim *resource len(config.Requests) > 0 && !slices.Contains(config.Requests, requestName) { continue } + _, err := apis.ValidateConfig(&config.Opaque.Parameters) + if err != nil { + errorList = append(errorList, err) + } } device := kubeletplugin.Device{ Requests: []string{result.Request}, @@ -454,7 +510,11 @@ func (np *NetworkDriver) prepareResourceClaim(_ context.Context, claim *resource } devices = append(devices, device) } - + if len(errorList) > 0 { + return kubeletplugin.PrepareResult{ + Err: fmt.Errorf("claim %s contain errors: %w", claim.UID, errors.Join(errorList...)), + } + } return kubeletplugin.PrepareResult{Devices: devices} } diff --git a/pkg/driver/hostdevice.go b/pkg/driver/hostdevice.go index 0c023287..e3da6b8a 100644 --- a/pkg/driver/hostdevice.go +++ b/pkg/driver/hostdevice.go @@ -19,7 +19,9 @@ package driver import ( "errors" "fmt" + "net" + "github.com/google/dranet/pkg/apis" "github.com/vishvananda/netlink" "github.com/vishvananda/netlink/nl" "github.com/vishvananda/netns" @@ -28,7 +30,7 @@ import ( resourceapi "k8s.io/api/resource/v1beta1" ) -func nsAttachNetdev(hostIfName string, containerNsPAth string, ifName string) (*resourceapi.NetworkDeviceData, error) { +func nsAttachNetdev(hostIfName string, containerNsPAth string, interfaceConfig apis.InterfaceConfig) (*resourceapi.NetworkDeviceData, error) { hostDev, err := netlink.LinkByName(hostIfName) // recover same behavior on vishvananda/netlink@1.2.1 and do not fail when the kernel returns NLM_F_DUMP_INTR. if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { @@ -40,10 +42,36 @@ func nsAttachNetdev(hostIfName string, containerNsPAth string, ifName string) (* return nil, fmt.Errorf("failed to set %q down: %v", hostDev.Attrs().Name, err) } - // get the existing IP addresses - addresses, err := netlink.AddrList(hostDev, netlink.FAMILY_ALL) - if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { - return nil, fmt.Errorf("fail to get ip addresses: %w", err) + addresses := []*net.IPNet{} + if len(interfaceConfig.Addresses) == 0 { + // get the existing IP addresses + nlAddresses, err := netlink.AddrList(hostDev, netlink.FAMILY_ALL) + if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { + return nil, fmt.Errorf("fail to get ip addresses: %w", err) + } + for _, address := range nlAddresses { + // Only move permanent IP addresses configured by the user, dynamic addresses are excluded because + // their validity may rely on the original network namespace's context and they may have limited + // lifetimes and are not guaranteed to be available in a new namespace. + // Ref: https://www.ietf.org/rfc/rfc3549.txt + if address.Flags&unix.IFA_F_PERMANENT == 0 { + continue + } + // Only move IP addresses with global scope because those are not host-specific, auto-configured, + // or have limited network scope, making them unsuitable inside the container namespace. + // Ref: https://www.ietf.org/rfc/rfc3549.txt + if address.Scope != unix.RT_SCOPE_UNIVERSE { + continue + } + // remove the interface attribute of the original address + // to avoid issues when the interface is renamed. + addresses = append(addresses, address.IPNet) + } + } else { + for _, addr := range interfaceConfig.Addresses { + _, ipnet, _ := net.ParseCIDR(addr) // already validated + addresses = append(addresses, ipnet) + } } containerNs, err := netns.GetFromPath(containerNsPAth) @@ -74,13 +102,18 @@ func nsAttachNetdev(hostIfName string, containerNsPAth string, ifName string) (* msg.Index = int32(attrs.Index) req.AddData(msg) - nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(attrs.Name)) + ifName := attrs.Name + if interfaceConfig.Name != "" { + ifName = interfaceConfig.Name + } + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(ifName)) req.AddData(nameData) - alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(attrs.Alias)) - req.AddData(alias) - - mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(attrs.MTU))) + ifMtu := uint32(attrs.MTU) + if interfaceConfig.MTU > 0 { + ifMtu = uint32(interfaceConfig.MTU) + } + mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(ifMtu)) req.AddData(mtu) val := nl.Uint32Attr(uint32(containerNs)) @@ -100,33 +133,23 @@ func nsAttachNetdev(hostIfName string, containerNsPAth string, ifName string) (* } defer nhNs.Close() - nsLink, err := nhNs.LinkByName(attrs.Name) + nsLink, err := nhNs.LinkByName(ifName) if err != nil && !errors.Is(err, netlink.ErrDumpInterrupted) { - return nil, fmt.Errorf("link not found for interface %s on namespace %s: %w", attrs.Name, containerNsPAth, err) + return nil, fmt.Errorf("link not found for interface %s on namespace %s: %w", ifName, containerNsPAth, err) + } + + networkData := &resourceapi.NetworkDeviceData{ + InterfaceName: nsLink.Attrs().Name, + HardwareAddress: string(nsLink.Attrs().HardwareAddr.String()), } - // Re-add the original IP addresses to the interface in the new namespace. - // The kernel removes IP addresses when an interface is moved between network namespaces. for _, address := range addresses { - // Only move permanent IP addresses configured by the user, dynamic addresses are excluded because - // their validity may rely on the original network namespace's context and they may have limited - // lifetimes and are not guaranteed to be available in a new namespace. - // Ref: https://www.ietf.org/rfc/rfc3549.txt - if address.Flags&unix.IFA_F_PERMANENT == 0 { - continue - } - // Only move IP addresses with global scope because those are not host-specific, auto-configured, - // or have limited network scope, making them unsuitable inside the container namespace. - // Ref: https://www.ietf.org/rfc/rfc3549.txt - if address.Scope != unix.RT_SCOPE_UNIVERSE { - continue - } - // remove the interface attribute of the original address - // to avoid issues when the interface is renamed. - err = nhNs.AddrAdd(nsLink, &netlink.Addr{IPNet: address.IPNet}) + err = nhNs.AddrAdd(nsLink, &netlink.Addr{IPNet: address}) if err != nil { return nil, fmt.Errorf("fail to set up address %s on namespace %s: %w", address.String(), containerNsPAth, err) } + networkData.IPs = append(networkData.IPs, address.String()) + } err = nhNs.LinkSetUp(nsLink) @@ -134,23 +157,10 @@ func nsAttachNetdev(hostIfName string, containerNsPAth string, ifName string) (* return nil, fmt.Errorf("failt to set up interface %s on namespace %s: %w", nsLink.Attrs().Name, containerNsPAth, err) } - networkData := &resourceapi.NetworkDeviceData{ - InterfaceName: nsLink.Attrs().Name, - HardwareAddress: string(nsLink.Attrs().HardwareAddr.String()), - } - - // get the existing IP addresses - addresses, err = nhNs.AddrList(nsLink, netlink.FAMILY_ALL) - if err == nil || errors.Is(err, netlink.ErrDumpInterrupted) { - for _, address := range addresses { - networkData.IPs = append(networkData.IPs, address.IPNet.String()) - } - } - return networkData, nil } -func nsDetachNetdev(containerNsPAth string, devName string) error { +func nsDetachNetdev(containerNsPAth string, devName string, outName string) error { containerNs, err := netns.GetFromPath(containerNsPAth) if err != nil { return fmt.Errorf("could not get network namespace from path %s for network device %s : %w", containerNsPAth, devName, err) @@ -204,15 +214,13 @@ func nsDetachNetdev(containerNsPAth string, devName string) error { msg.Index = int32(attrs.Index) req.AddData(msg) - nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(attrs.Name)) + ifName := attrs.Name + if outName != "" { + ifName = outName + } + nameData := nl.NewRtAttr(unix.IFLA_IFNAME, nl.ZeroTerminated(ifName)) req.AddData(nameData) - alias := nl.NewRtAttr(unix.IFLA_IFALIAS, []byte(attrs.Alias)) - req.AddData(alias) - - mtu := nl.NewRtAttr(unix.IFLA_MTU, nl.Uint32Attr(uint32(attrs.MTU))) - req.AddData(mtu) - val := nl.Uint32Attr(uint32(rootNs)) attr := nl.NewRtAttr(unix.IFLA_NET_NS_FD, val) req.AddData(attr) diff --git a/pkg/driver/namespace.go b/pkg/driver/namespace.go new file mode 100644 index 00000000..0b5da60f --- /dev/null +++ b/pkg/driver/namespace.go @@ -0,0 +1,60 @@ +/* +Copyright 2024 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package driver + +import ( + "errors" + "net" + + "github.com/google/dranet/pkg/apis" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" +) + +func netnsRouting(containerNsPAth string, routeConfig []apis.RouteConfig) error { + containerNs, err := netns.GetFromPath(containerNsPAth) + if err != nil { + return err + } + defer containerNs.Close() + + // to avoid golang problem with goroutines we create the socket in the + // namespace and use it directly + nhNs, err := netlink.NewHandleAt(containerNs) + if err != nil { + return err + } + defer nhNs.Close() + + errorList := []error{} + for _, route := range routeConfig { + r := netlink.Route{} + + _, dst, _ := net.ParseCIDR(route.Destination) // nolint:errcheck already validated + r.Dst = dst + r.Gw = net.ParseIP(route.Gateway) // already validated + if route.Source != "" { + r.Src = net.ParseIP(route.Source) + } + if err := nhNs.RouteAdd(&r); err != nil { + errorList = append(errorList, err) + } + + } + return errors.Join(errorList...) +} diff --git a/tests/e2e.bats b/tests/e2e.bats index 8fe04c50..f24a1b80 100644 --- a/tests/e2e.bats +++ b/tests/e2e.bats @@ -1,17 +1,38 @@ #!/usr/bin/env bats -@test "dummy interface with IP addresses" { +@test "dummy interface with IP addresses ResourceClaim" { docker exec "$CLUSTER_NAME"-worker bash -c "ip link add dummy0 type dummy" docker exec "$CLUSTER_NAME"-worker bash -c "ip link set up dev dummy0" - docker exec "$CLUSTER_NAME"-worker bash -c "ip addr add 169.254.169.13/32 dev dummy0" kubectl apply -f "$BATS_TEST_DIRNAME"/../examples/deviceclass.yaml kubectl apply -f "$BATS_TEST_DIRNAME"/../examples/resourceclaim.yaml kubectl wait --timeout=2m --for=condition=ready pods -l app=pod - run kubectl exec pod1 -- ip addr show dummy0 + run kubectl exec pod1 -- ip addr show eth99 [ "$status" -eq 0 ] [[ "$output" == *"169.254.169.13"* ]] run kubectl get resourceclaims dummy-interface-static-ip -o=jsonpath='{.status.devices[0].networkData.ips[*]}' [ "$status" -eq 0 ] [[ "$output" == *"169.254.169.13"* ]] + + kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/deviceclass.yaml + kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/resourceclaim.yaml +} + + +@test "dummy interface with IP addresses ResourceClaimTemplate" { + docker exec "$CLUSTER_NAME"-worker2 bash -c "ip link add dummy1 type dummy" + docker exec "$CLUSTER_NAME"-worker2 bash -c "ip addr add 169.254.169.14/32 dev dummy1" + + kubectl apply -f "$BATS_TEST_DIRNAME"/../examples/resourceclaimtemplate.yaml + kubectl wait --timeout=2m --for=condition=ready pods -l app=MyApp + POD_NAME=$(kubectl get pods -l app=MyApp -o name) + run kubectl exec $POD_NAME -- ip addr show dummy1 + [ "$status" -eq 0 ] + [[ "$output" == *"169.254.169.14"* ]] + # TODO list the specific resourceclaim and the networkdata + run kubectl get resourceclaims -o yaml + [ "$status" -eq 0 ] + [[ "$output" == *"169.254.169.14"* ]] + + kubectl delete -f "$BATS_TEST_DIRNAME"/../examples/resourceclaimtemplate.yaml }