diff --git a/network/networkutils/networkutils_linux.go b/network/networkutils/networkutils_linux.go index 9fbd906578..d964efcc94 100644 --- a/network/networkutils/networkutils_linux.go +++ b/network/networkutils/networkutils_linux.go @@ -256,14 +256,14 @@ func (nu NetworkUtils) DisableRAForInterface(ifName string) error { raFilePath := fmt.Sprintf(acceptRAV6File, ifName) exist, err := platform.CheckIfFileExists(raFilePath) if !exist { - logger.Error("accept_ra file doesn't exist with", zap.Error(err)) + logger.Error("accept_ra file doesn't exist with", zap.Error(err), zap.String("ifName", ifName)) return nil } cmd := fmt.Sprintf(disableRACmd, ifName) out, err := nu.plClient.ExecuteRawCommand(cmd) if err != nil { - logger.Error("Diabling ra failed with", zap.Error(err), zap.Any("out", out)) + logger.Error("Diabling ra failed with", zap.Error(err), zap.Any("out", out), zap.String("ifName", ifName)) } return err diff --git a/network/transparent_vlan_endpointclient_linux.go b/network/transparent_vlan_endpointclient_linux.go index fc4399ec3d..326fc0c87e 100644 --- a/network/transparent_vlan_endpointclient_linux.go +++ b/network/transparent_vlan_endpointclient_linux.go @@ -110,11 +110,24 @@ func NewTransparentVlanEndpointClient( return client } +// cleanupInterfaceIfExists checks if an interface exists and deletes it if found +// Returns an error if the deletion fails +func (client *TransparentVlanEndpointClient) cleanupInterfaceIfExists(interfaceName string) error { + _, ifExists := client.netioshim.GetNetworkInterfaceByName(interfaceName) + if ifExists == nil { + logger.Info("Interface exists, deleting", zap.String("interfaceName", interfaceName)) + if err := client.netlink.DeleteLink(interfaceName); err != nil { + return errors.Wrapf(err, "failed to clean up %s", interfaceName) + } + } + return nil +} + // Adds interfaces to the vnet (created if not existing) and vm namespace func (client *TransparentVlanEndpointClient) AddEndpoints(epInfo *EndpointInfo) error { // VM Namespace if err := client.ensureCleanPopulateVM(); err != nil { - return errors.Wrap(err, "failed to ensure both network namespace and vlan veth were present or both absent") + return errors.Wrap(err, "failed to ensure both network namespace and vlan interface were present or both absent") } if err := client.PopulateVM(epInfo); err != nil { return err @@ -131,7 +144,7 @@ func (client *TransparentVlanEndpointClient) AddEndpoints(epInfo *EndpointInfo) // Called from AddEndpoints, Namespace: VM and Vnet func (client *TransparentVlanEndpointClient) ensureCleanPopulateVM() error { // Clean up vlan interface in the VM namespace and ensure the network namespace (if it exists) has a vlan interface - logger.Info("Checking if NS and vlan veth exists...") + logger.Info("Checking if NS and vlan interface exists...") var existingErr error client.vnetNSFileDescriptor, existingErr = client.netnsClient.GetFromName(client.vnetNSName) if existingErr == nil { @@ -146,18 +159,14 @@ func (client *TransparentVlanEndpointClient) ensureCleanPopulateVM() error { logger.Info("vlan interface doesn't exist even though network namespace exists, deleting network namespace...", zap.String("message", vlanIfErr.Error())) delErr := client.netnsClient.DeleteNamed(client.vnetNSName) if delErr != nil { - return errors.Wrap(delErr, "failed to cleanup/delete ns after noticing vlan veth does not exist") + return errors.Wrap(delErr, "failed to cleanup/delete ns after noticing vlan interface does not exist") } } } // Delete the vlan interface in the VM namespace if it exists - _, vlanIfInVMErr := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName) - if vlanIfInVMErr == nil { - // The vlan interface exists in the VM ns because it failed to move into the network ns previously and needs to be cleaned up - logger.Info("vlan interface exists on the VM namespace, deleting", zap.String("vlanIfName", client.vlanIfName)) - if delErr := client.netlink.DeleteLink(client.vlanIfName); delErr != nil { - return errors.Wrap(delErr, "failed to clean up vlan interface") - } + // The vlan interface exists in the VM ns because it failed to move into the network ns previously and needs to be cleaned up + if delErr := client.cleanupInterfaceIfExists(client.vlanIfName); delErr != nil { + return errors.Wrap(delErr, "failed to clean up vlan interface") } return nil } @@ -183,24 +192,24 @@ func (client *TransparentVlanEndpointClient) createNetworkNamespace(vmNS int) er return errors.Wrap(errNamespaceCreation, "vnet and vm namespace are the same") } -// Called from PopulateVM, Namespace: VM and namespace represented by fd -func (client *TransparentVlanEndpointClient) setLinkNetNSAndConfirm(name string, fd uintptr) error { +// Called from PopulateVM, Namespace: VM and namespace represented by fd (which is named nsName) +func (client *TransparentVlanEndpointClient) setLinkNetNSAndConfirm(name string, fd uintptr, nsName string) error { logger.Info("Move link to NS", zap.String("ifName", name), zap.Any("NSFileDescriptor", fd)) err := client.netlink.SetLinkNetNs(name, fd) if err != nil { - return errors.Wrapf(err, "failed to set %v inside namespace %v", name, fd) + return errors.Wrapf(err, "failed to set %v inside namespace %v (%s)", name, fd, nsName) } // confirm veth was moved successfully err = RunWithRetries(func() error { // retry checking in the namespace if the interface is not detected - return ExecuteInNS(client.nsClient, client.vnetNSName, func() error { - _, ifDetectedErr := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName) - return errors.Wrap(ifDetectedErr, "failed to get vlan veth in namespace") + return ExecuteInNS(client.nsClient, nsName, func() error { + _, ifDetectedErr := client.netioshim.GetNetworkInterfaceByName(name) + return errors.Wrap(ifDetectedErr, "failed to confirm interface moved in namespace") }) }, numRetries, sleepInMs) if err != nil { - return errors.Wrapf(err, "failed to detect %v inside namespace %v", name, fd) + return errors.Wrapf(err, "failed to detect %v inside namespace %v (%s)", name, fd, nsName) } return nil } @@ -215,7 +224,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er var existingErr error client.vnetNSFileDescriptor, existingErr = client.netnsClient.GetFromName(client.vnetNSName) // If the ns does not exist, the below code will trigger to create it - // This will also (we assume) mean the vlan veth does not exist + // This will also (we assume) mean the vlan interface does not exist if existingErr != nil { // We assume the only possible error is that the namespace doesn't exist logger.Info("No existing NS detected. Creating the vnet namespace and switching to it", zap.String("message", existingErr.Error())) @@ -234,7 +243,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er logger.Info("[transparent vlan] removing vnet ns due to failure...") err = client.netnsClient.DeleteNamed(client.vnetNSName) if err != nil { - logger.Error("failed to cleanup/delete ns after failing to create vlan veth") + logger.Error("failed to cleanup/delete ns after failing to create vlan interface") } } }() @@ -242,7 +251,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er return errors.Wrap(deleteNSIfNotNilErr, "failed to set current ns to vm") } - // Now create vlan veth + // Now create vlan interface logger.Info("Create the host vlan link after getting eth0", zap.String("primaryHostIfName", client.primaryHostIfName)) // Get parent interface index. Index is consistent across libraries. eth0, deleteNSIfNotNilErr := client.netioshim.GetNetworkInterfaceByName(client.primaryHostIfName) @@ -258,7 +267,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er VlanId: client.vlanID, } logger.Info("Attempting to create link in VM NS", zap.String("vlanIfName", client.vlanIfName)) - // Create vlan veth + // Create vlan interface deleteNSIfNotNilErr = vishnetlink.LinkAdd(link) if deleteNSIfNotNilErr != nil { // ignore link already exists error @@ -271,9 +280,9 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er } defer func() { if deleteNSIfNotNilErr != nil { - logger.Info("removing vlan veth due to failure...") + logger.Info("removing vlan interface due to failure...") if delErr := client.netlink.DeleteLink(client.vlanIfName); delErr != nil { - logger.Error("deleting vlan veth failed on addendpoint failure with", zap.Any("error:", delErr.Error())) + logger.Error("deleting vlan interface failed on addendpoint failure with", zap.Any("error:", delErr.Error())) } } }() @@ -281,11 +290,11 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er // sometimes there is slight delay in interface creation. check if it exists err = RunWithRetries(func() error { _, err = client.netioshim.GetNetworkInterfaceByName(client.vlanIfName) - return errors.Wrap(err, "failed to get vlan veth") + return errors.Wrap(err, "failed to get vlan interface") }, numRetries, sleepInMs) if err != nil { - deleteNSIfNotNilErr = errors.Wrapf(err, "failed to get vlan veth interface:%s", client.vlanIfName) + deleteNSIfNotNilErr = errors.Wrapf(err, "failed to get vlan interface: %s", client.vlanIfName) return deleteNSIfNotNilErr } @@ -293,13 +302,13 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er if deleteNSIfNotNilErr != nil { return errors.Wrap(deleteNSIfNotNilErr, "failed to disable router advertisements for vlan vnet link") } - // vlan veth was created successfully, so move the vlan veth you created + // vlan interface was created successfully, so move the vlan interface you created logger.Info("Move vlan link to vnet NS", zap.String("vlanIfName", client.vlanIfName), zap.Any("vnetNSFileDescriptor", uintptr(client.vnetNSFileDescriptor))) - deleteNSIfNotNilErr = client.setLinkNetNSAndConfirm(client.vlanIfName, uintptr(client.vnetNSFileDescriptor)) + deleteNSIfNotNilErr = client.setLinkNetNSAndConfirm(client.vlanIfName, uintptr(client.vnetNSFileDescriptor), client.vnetNSName) if deleteNSIfNotNilErr != nil { - return errors.Wrap(deleteNSIfNotNilErr, "failed to move or detect vlan veth inside vnet namespace") + return errors.Wrap(deleteNSIfNotNilErr, "failed to move or detect vlan interface inside vnet namespace") } - logger.Info("Moving vlan veth into namespace confirmed") + logger.Info("Moving vlan interface into namespace confirmed") } else { logger.Info("Existing NS detected. vlan interface should exist or namespace would've been deleted.", zap.String("vnetNSName", client.vnetNSName), zap.String("vlanIfName", client.vlanIfName)) } @@ -310,6 +319,14 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er logger.Info("Failed to parse the mac address", zap.String("defaultHostVethHwAddr", defaultHostVethHwAddr)) } + // Proactively clean up any leftover veth interfaces before creating new ones + if vnetDelErr := client.cleanupInterfaceIfExists(client.vnetVethName); vnetDelErr != nil { + logger.Info("Could not proactively clean up vnet veth", zap.String("vnetVethName", client.vnetVethName), zap.Error(vnetDelErr)) + } + if containerDelErr := client.cleanupInterfaceIfExists(client.containerVethName); containerDelErr != nil { + logger.Info("Could not proactively clean up container veth", zap.String("containerVethName", client.containerVethName), zap.Error(containerDelErr)) + } + // Create veth pair if err = client.netUtilsClient.CreateEndpoint(client.vnetVethName, client.containerVethName, mac); err != nil { return errors.Wrap(err, "failed to create veth pair") @@ -349,7 +366,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er return errors.Wrap(err, "failed to disable RA on container veth, deleting") } - if err = client.setLinkNetNSAndConfirm(client.vnetVethName, uintptr(client.vnetNSFileDescriptor)); err != nil { + if err = client.setLinkNetNSAndConfirm(client.vnetVethName, uintptr(client.vnetNSFileDescriptor), client.vnetNSName); err != nil { if delErr := client.netlink.DeleteLink(client.vnetVethName); delErr != nil { logger.Error("Deleting vnet veth failed on addendpoint failure with", zap.Error(delErr)) } @@ -363,7 +380,7 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er func (client *TransparentVlanEndpointClient) PopulateVnet(epInfo *EndpointInfo) error { _, err := client.netioshim.GetNetworkInterfaceByName(client.vlanIfName) if err != nil { - return errors.Wrap(err, "vlan veth doesn't exist") + return errors.Wrap(err, "vlan interface doesn't exist") } vnetVethIf, err := client.netioshim.GetNetworkInterfaceByName(client.vnetVethName) if err != nil { @@ -659,7 +676,7 @@ func (client *TransparentVlanEndpointClient) DeleteEndpointsImpl(ep *endpoint, _ // TODO: revist if this require in future. //nolint gocritic /* if routesLeft <= numDefaultRoutes { - // Deletes default arp, default routes, vlan veth; there are two default routes + // Deletes default arp, default routes, vlan interface; there are two default routes // so when we have <= numDefaultRoutes routes left, no containers use this namespace log.Printf("[transparent vlan] Deleting namespace %s as no containers occupy it", client.vnetNSName) delErr := client.netnsClient.DeleteNamed(client.vnetNSName) diff --git a/network/transparent_vlan_endpointclient_linux_test.go b/network/transparent_vlan_endpointclient_linux_test.go index f21dab9bf4..79e70adb98 100644 --- a/network/transparent_vlan_endpointclient_linux_test.go +++ b/network/transparent_vlan_endpointclient_linux_test.go @@ -121,12 +121,14 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { nl := netlink.NewMockNetlink(false, "") plc := platform.NewMockExecClient(false) - tests := []struct { - name string - client *TransparentVlanEndpointClient - epInfo *EndpointInfo - wantErr bool - wantErrMsg string + setLinkNetNSTests := []struct { + name string + client *TransparentVlanEndpointClient + epInfo *EndpointInfo + moveInterface string + moveNS string + wantErr bool + wantErrMsg string }{ // Set the link network namespace and confirm that it was moved inside { @@ -143,8 +145,10 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { netioshim: netio.NewMockNetIO(false, 0), nsClient: NewMockNamespaceClient(), }, - epInfo: &EndpointInfo{}, - wantErr: false, + moveInterface: "eth0.1", + moveNS: "az_ns_1", + epInfo: &EndpointInfo{}, + wantErr: false, }, { name: "Set link netns fail to set", @@ -160,9 +164,11 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { netioshim: netio.NewMockNetIO(false, 0), nsClient: NewMockNamespaceClient(), }, - epInfo: &EndpointInfo{}, - wantErr: true, - wantErrMsg: "failed to set eth0.1", + moveInterface: "A1veth0", + moveNS: "az_ns_2", + epInfo: &EndpointInfo{}, + wantErr: true, + wantErrMsg: "failed to set A1veth0 inside namespace 1 (az_ns_2)", }, { name: "Set link netns fail to detect", @@ -181,15 +187,17 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { }, nsClient: NewMockNamespaceClient(), }, - epInfo: &EndpointInfo{}, - wantErr: true, - wantErrMsg: "failed to detect eth0.1", + moveInterface: "eth0.1", + moveNS: "az_ns_1", + epInfo: &EndpointInfo{}, + wantErr: true, + wantErrMsg: "failed to detect eth0.1 inside namespace 1 (az_ns_1)", }, } - for _, tt := range tests { + for _, tt := range setLinkNetNSTests { tt := tt t.Run(tt.name, func(t *testing.T) { - err := tt.client.setLinkNetNSAndConfirm(tt.client.vlanIfName, 1) + err := tt.client.setLinkNetNSAndConfirm(tt.moveInterface, 1, tt.moveNS) if tt.wantErr { require.Error(t, err) require.Contains(t, err.Error(), tt.wantErrMsg, "Expected:%v actual:%v", tt.wantErrMsg, err.Error()) @@ -199,7 +207,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { }) } - tests = []struct { + tests := []struct { name string client *TransparentVlanEndpointClient epInfo *EndpointInfo @@ -256,7 +264,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { }, epInfo: &EndpointInfo{}, wantErr: true, - wantErrMsg: "failed to cleanup/delete ns after noticing vlan veth does not exist: netns failure: " + errNetnsMock.Error(), + wantErrMsg: "failed to cleanup/delete ns after noticing vlan interface does not exist: netns failure: " + errNetnsMock.Error(), }, { name: "Ensure clean populate VM cleanup straggling vlan if in vm ns", @@ -344,7 +352,12 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { set: defaultSet, deleteNamed: defaultDeleteNamed, }, - netlink: netlink.NewMockNetlink(false, ""), + netlink: &netlink.MockNetlink{ + DeleteLinkFn: func(_ string) error { + // should still succeed + return netlink.ErrorMockNetlink + }, + }, plClient: platform.NewMockExecClient(false), netUtilsClient: networkutils.NewNetworkUtils(nl, plc), netioshim: netio.NewMockNetIO(false, 0), @@ -375,7 +388,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { }, epInfo: &EndpointInfo{}, wantErr: true, - wantErrMsg: "failed to move or detect vnetVethName in vnet ns, deleting: failed to set A1veth0 inside namespace 1: " + netlink.ErrorMockNetlink.Error() + " : netlink fail", + wantErrMsg: "failed to move or detect vnetVethName in vnet ns, deleting: failed to set A1veth0 inside namespace 1 (az_ns_1): " + netlink.ErrorMockNetlink.Error() + " : netlink fail", }, { name: "Add endpoints get interface fail for primary interface (eth0)", @@ -523,7 +536,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { wantErr: false, }, { - name: "Add endpoints fail check vlan veth exists", + name: "Add endpoints fail check vlan interface exists", client: &TransparentVlanEndpointClient{ primaryHostIfName: "eth0", vlanIfName: "eth0.1", @@ -537,7 +550,7 @@ func TestTransparentVlanAddEndpoints(t *testing.T) { }, epInfo: &EndpointInfo{}, wantErr: true, - wantErrMsg: "vlan veth doesn't exist: " + netio.ErrMockNetIOFail.Error() + ":eth0.1", + wantErrMsg: "vlan interface doesn't exist: " + netio.ErrMockNetIOFail.Error() + ":eth0.1", }, { name: "Add endpoints fail check vnet veth exists",