Skip to content

Commit 9600774

Browse files
committed
Fix: adding support for SWiftV2 Linux stateless CNI mode
1 parent bd3f491 commit 9600774

File tree

7 files changed

+279
-33
lines changed

7 files changed

+279
-33
lines changed

cni/network/network.go

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/Azure/azure-container-networking/cni/util"
1919
"github.com/Azure/azure-container-networking/cns"
2020
cnscli "github.com/Azure/azure-container-networking/cns/client"
21-
"github.com/Azure/azure-container-networking/cns/fsnotify"
2221
"github.com/Azure/azure-container-networking/common"
2322
"github.com/Azure/azure-container-networking/dhcp"
2423
"github.com/Azure/azure-container-networking/iptables"
@@ -719,7 +718,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn
719718
*opt.infraSeen = true
720719
} else {
721720
ifName = "eth" + strconv.Itoa(opt.endpointIndex)
722-
endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName)
721+
endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType)
723722
}
724723

725724
endpointInfo := network.EndpointInfo{
@@ -1075,33 +1074,45 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10751074
if plugin.nm.IsStatelessCNIMode() {
10761075
// network ID is passed in and used only for migration
10771076
// otherwise, in stateless, we don't need the network id for deletion
1078-
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID)
1079-
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found
1077+
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
1078+
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found or CNS connection failure
1079+
// return a retriable error so the container runtime will retry this DEL later
1080+
// the implementation of this function returns nil if the endpoint doesn't exist, so
1081+
// we don't have to check that here
10801082
if err != nil {
1081-
// async delete should be disabled for standalone scenario
1082-
if errors.Is(err, network.ErrConnectionFailure) && !nwCfg.DisableAsyncDelete {
1083-
logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err))
1084-
addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath)
1085-
logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr))
1086-
if addErr != nil {
1087-
logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr))
1088-
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID))
1083+
switch {
1084+
case errors.Is(err, network.ErrConnectionFailure):
1085+
logger.Error("Failed to connect to CNS", zap.Error(err))
1086+
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
1087+
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
1088+
// we asynchronously remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
1089+
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
1090+
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
1091+
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
1092+
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
1093+
// This workaround mitigates the issue by removing the secondary NIC from the pod netns when CNS is unreachable during DEL to provide the endpoint state.
1094+
if err = plugin.nm.RemoveSecondaryEndpointFromPodNetNS(args.IfName, args.Netns); err != nil {
1095+
logger.Error("Failed to remove secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
1096+
return plugin.RetriableError(fmt.Errorf("failed to remove secondary endpoint from pod netns: %w", err))
10891097
}
1090-
return nil
1091-
}
1092-
if errors.Is(err, network.ErrEndpointStateNotFound) {
1098+
case errors.Is(err, network.ErrEndpointStateNotFound):
10931099
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
10941100
return nil
1101+
default:
1102+
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1103+
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
1104+
}
1105+
} else {
1106+
for _, epInfo := range epInfos {
1107+
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
10951108
}
1096-
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1097-
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
10981109
}
10991110
} else {
11001111
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
11011112
}
11021113

1103-
// for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1104-
// this block is not applied to stateless CNI
1114+
// for Stateful CNI when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1115+
// this block is applied to stateless CNI only if there was a connection failure in previous block and asynchronous delete by CNS will remover the endpoint from state file
11051116
if len(epInfos) == 0 {
11061117
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
11071118
if !nwCfg.MultiTenancy {
@@ -1127,7 +1138,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
11271138
if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil {
11281139
// An error will not be returned if the endpoint is not found
11291140
// return a retriable error so the container runtime will retry this DEL later
1130-
// the implementation of this function returns nil if the endpoint doens't exist, so
1141+
// the implementation of this function returns nil if the endpoint doesn't exist, so
11311142
// we don't have to check that here
11321143
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
11331144
}

network/endpoint_linux.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,3 +547,12 @@ func getDefaultGateway(routes []RouteInfo) net.IP {
547547
func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*EndpointInfo, error) {
548548
return epInfo, nil
549549
}
550+
551+
// removeSecondaryEndpointFromPodNetNSImpl deletes an existing secondary endpoint from the pod network namespace.
552+
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(nsc NamespaceClientInterface) error {
553+
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nsc, nil, ep)
554+
if err := secondaryepClient.RemoveInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace); err != nil {
555+
return err
556+
}
557+
return nil
558+
}

network/endpoint_windows.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,3 +770,8 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
770770
logger.Info("Retrieved device problem code", zap.String("code", devpkeyDeviceProblemCode))
771771
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
772772
}
773+
774+
// removeSecondaryEndpointFromPodNetNSImpl removes an existing secondary endpoint from the pod network namespace.
775+
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(_ NamespaceClientInterface) error {
776+
return nil
777+
}

network/manager.go

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,13 @@ type NetworkManager interface {
116116
UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error
117117
GetNumberOfEndpoints(ifName string, networkID string) int
118118
GetEndpointID(containerID, ifName string) string
119+
GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string
119120
IsStatelessCNIMode() bool
120121
SaveState(eps []*endpoint) error
121122
DeleteState(epInfos []*EndpointInfo) error
122123
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
123-
GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error)
124+
GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error)
125+
RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error
124126
}
125127

126128
// Creates a new network manager.
@@ -456,7 +458,7 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string
456458
// GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo
457459
// TODO unit tests need to be added, WorkItem: 26606939
458460
// In stateless cni, container id is the endpoint id, so you can pass in either
459-
func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) {
461+
func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) {
460462
endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID)
461463
if err != nil {
462464
if endpointResponse.Response.ReturnCode == types.NotFound {
@@ -467,7 +469,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En
467469
}
468470
return nil, ErrGetEndpointStateFailure
469471
}
470-
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID)
472+
epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns)
471473

472474
for i := 0; i < len(epInfos); i++ {
473475
if epInfos[i].NICType == cns.InfraNIC {
@@ -515,7 +517,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
515517
nw := &network{
516518
Id: networkID, // currently unused in stateless cni
517519
HnsId: epInfo.HNSNetworkID,
518-
Mode: opModeTransparentVlan,
520+
Mode: opModeTransparent,
519521
SnatBridgeIP: "",
520522
NetNs: dummyGUID, // to trigger hns v2, windows
521523
extIf: &externalInterface{
@@ -530,6 +532,7 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
530532
HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network)
531533
HostIfName: epInfo.HostIfName,
532534
LocalIP: "",
535+
IPAddresses: epInfo.IPAddresses,
533536
VlanID: 0,
534537
AllowInboundFromHostToNC: false, // stateless currently does not support apipa
535538
AllowInboundFromNCToHost: false,
@@ -538,11 +541,12 @@ func (nm *networkManager) DeleteEndpointStateless(networkID string, epInfo *Endp
538541
NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false
539542
NetNs: dummyGUID, // to trigger hnsv2, windows
540543
NICType: epInfo.NICType,
544+
NetworkNameSpace: epInfo.NetNsPath,
541545
IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
542546
}
543547
logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId))
544548

545-
err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep)
549+
err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep)
546550
if err != nil {
547551
return err
548552
}
@@ -563,7 +567,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi
563567

564568
if nm.IsStatelessCNIMode() {
565569
logger.Info("calling cns getEndpoint API")
566-
epInfos, err := nm.GetEndpointState(networkID, endpointID)
570+
epInfos, err := nm.GetEndpointState(networkID, endpointID, "")
567571
if err != nil {
568572
return nil, err
569573
}
@@ -746,6 +750,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string {
746750
return containerID + "-" + ifName
747751
}
748752

753+
// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
754+
func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
755+
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
756+
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
757+
return containerID + "-" + ifName
758+
}
759+
// For InfraNIC, use GetEndpointID() logic.
760+
return nm.GetEndpointID(containerID, ifName)
761+
}
762+
749763
// saves the map of network ids to endpoints to the state file
750764
func (nm *networkManager) SaveState(eps []*endpoint) error {
751765
nm.Lock()
@@ -797,7 +811,7 @@ func (nm *networkManager) DeleteState(epInfos []*EndpointInfo) error {
797811
}
798812

799813
// called to convert a cns restserver EndpointInfo into a network EndpointInfo
800-
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo {
814+
func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo {
801815
ret := []*EndpointInfo{}
802816

803817
for ifName, ipInfo := range endpointInfo.IfnameToIPMap {
@@ -867,3 +881,14 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {
867881

868882
return ifNametoIPInfoMap
869883
}
884+
885+
// RemoveSecondaryEndpointFromPodNetNS removes the secondary endpoint from the pod netns
886+
func (nm *networkManager) RemoveSecondaryEndpointFromPodNetNS(ifName, netns string) error {
887+
ep := &endpoint{
888+
NetworkNameSpace: netns,
889+
IfName: ifName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
890+
}
891+
logger.Info("Removing Secondary Endpoint from", zap.String("NetworkNameSpace: ", netns))
892+
err := ep.removeSecondaryEndpointFromPodNetNSImpl(nm.nsClient)
893+
return err
894+
}

network/manager_mock.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package network
22

33
import (
4+
"github.com/Azure/azure-container-networking/cns"
45
"github.com/Azure/azure-container-networking/common"
56
)
67

@@ -94,6 +95,16 @@ func (nm *MockNetworkManager) GetEndpointID(containerID, ifName string) string {
9495
return containerID + "-" + ifName
9596
}
9697

98+
// GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type.
99+
func (nm *MockNetworkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string {
100+
// For stateless CNI, secondary NICs use containerID-ifName as endpointID.
101+
if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC {
102+
return containerID + "-" + ifName
103+
}
104+
// For InfraNIC, use GetEndpointID() logic.
105+
return nm.GetEndpointID(containerID, ifName)
106+
}
107+
97108
func (nm *MockNetworkManager) GetAllEndpoints(networkID string) (map[string]*EndpointInfo, error) {
98109
return nm.TestEndpointInfoMap, nil
99110
}
@@ -207,6 +218,10 @@ func (nm *MockNetworkManager) GetEndpointInfosFromContainerID(containerID string
207218
return ret
208219
}
209220

210-
func (nm *MockNetworkManager) GetEndpointState(_, _ string) ([]*EndpointInfo, error) {
221+
func (nm *MockNetworkManager) GetEndpointState(_, _, _ string) ([]*EndpointInfo, error) {
211222
return []*EndpointInfo{}, nil
212223
}
224+
225+
func (nm *MockNetworkManager) RemoveSecondaryEndpointFromPodNetNS(_, _ string) error {
226+
return nil
227+
}

network/manager_test.go

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ var _ = Describe("Test Manager", func() {
350350
PodNamespace: "test-pod-ns",
351351
}
352352

353-
epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID)
353+
epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID, "")
354354

355355
Expect(len(epInfos)).To(Equal(1))
356356
Expect(epInfos[0]).To(Equal(
@@ -400,7 +400,7 @@ var _ = Describe("Test Manager", func() {
400400
PodNamespace: "test-pod-ns",
401401
}
402402

403-
epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID)
403+
epInfos := cnsEndpointInfotoCNIEpInfos(cnsEndpointInfo, endpointID, "")
404404

405405
Expect(len(epInfos)).To(Equal(2))
406406
Expect(epInfos).To(ContainElement(
@@ -489,3 +489,109 @@ var _ = Describe("Test Manager", func() {
489489
})
490490
})
491491
})
492+
493+
func TestGetEndpointIDByNicType_Cases(t *testing.T) {
494+
nm := &networkManager{}
495+
496+
cases := []struct {
497+
name string
498+
stateless bool
499+
containerID string
500+
ifName string
501+
nicType cns.NICType
502+
expectedResult string
503+
}{
504+
{
505+
name: "Stateless InfraNIC",
506+
stateless: true,
507+
containerID: "container123",
508+
ifName: "eth0",
509+
nicType: cns.InfraNIC,
510+
expectedResult: "container123",
511+
},
512+
{
513+
name: "Stateless SecondaryNIC",
514+
stateless: true,
515+
containerID: "container123",
516+
ifName: "eth1",
517+
nicType: cns.DelegatedVMNIC,
518+
expectedResult: "container123-eth1",
519+
},
520+
{
521+
name: "Stateful InfraNIC",
522+
stateless: false,
523+
containerID: "container123456789",
524+
ifName: "eth0",
525+
nicType: cns.InfraNIC,
526+
expectedResult: "containe-eth0", // truncated to 8 chars
527+
},
528+
}
529+
530+
for _, tc := range cases {
531+
t.Run(tc.name, func(t *testing.T) {
532+
nm.statelessCniMode = tc.stateless
533+
id := nm.GetEndpointIDByNicType(tc.containerID, tc.ifName, tc.nicType)
534+
if id != tc.expectedResult {
535+
t.Errorf("expected %s, got %s", tc.expectedResult, id)
536+
}
537+
})
538+
}
539+
}
540+
541+
func TestCnsEndpointInfotoCNIEpInfos_Cases(t *testing.T) {
542+
cases := []struct {
543+
name string
544+
ifName string
545+
ipInfo restserver.IPInfo
546+
netNs string
547+
expectedNetNs string
548+
expectedIfName string
549+
expectedNICType cns.NICType
550+
}{
551+
{
552+
name: "DelegatedVMNIC",
553+
ifName: "eth1",
554+
netNs: "/var/run/netns/testns",
555+
ipInfo: restserver.IPInfo{
556+
NICType: cns.DelegatedVMNIC,
557+
},
558+
expectedNetNs: "/var/run/netns/testns",
559+
expectedIfName: "eth1",
560+
expectedNICType: cns.DelegatedVMNIC,
561+
},
562+
{
563+
name: "InfraNIC",
564+
ifName: "eth0",
565+
netNs: "",
566+
ipInfo: restserver.IPInfo{
567+
NICType: cns.InfraNIC,
568+
},
569+
expectedNetNs: "",
570+
expectedIfName: "eth0",
571+
expectedNICType: cns.InfraNIC,
572+
},
573+
}
574+
575+
for _, tc := range cases {
576+
t.Run(tc.name, func(t *testing.T) {
577+
endpointInfo := restserver.EndpointInfo{
578+
IfnameToIPMap: map[string]*restserver.IPInfo{
579+
tc.ifName: &tc.ipInfo,
580+
},
581+
}
582+
epInfos := cnsEndpointInfotoCNIEpInfos(endpointInfo, "container123", tc.netNs)
583+
if len(epInfos) == 0 {
584+
t.Fatalf("expected at least one epInfo")
585+
}
586+
if epInfos[0].NetNsPath != tc.expectedNetNs {
587+
t.Errorf("expected NetNsPath %q, got %q", tc.expectedNetNs, epInfos[0].NetNsPath)
588+
}
589+
if epInfos[0].IfName != tc.expectedIfName {
590+
t.Errorf("expected IfName %q, got %q", tc.expectedIfName, epInfos[0].IfName)
591+
}
592+
if epInfos[0].NICType != tc.expectedNICType {
593+
t.Errorf("expected NICType %v, got %v", tc.expectedNICType, epInfos[0].NICType)
594+
}
595+
})
596+
}
597+
}

0 commit comments

Comments
 (0)