@@ -26,14 +26,11 @@ const (
2626 tunnelingTable = 2 // Packets not entering on the vlan interface go to this routing table
2727 tunnelingMark = 333 // The packets that are to tunnel will be marked with this number
2828 DisableRPFilterCmd = "sysctl -w net.ipv4.conf.all.rp_filter=0" // Command to disable the rp filter for tunneling
29+ numRetries = 5
30+ sleepInMs = 100
2931)
3032
31- var errNamespaceCreation = fmt .Errorf ("Network namespace creation error" )
32-
33- var (
34- numRetries = 5
35- sleepInMs = 100
36- )
33+ var errNamespaceCreation = fmt .Errorf ("network namespace creation error" )
3734
3835type netnsClient interface {
3936 Get () (fileDescriptor int , err error )
@@ -113,8 +110,10 @@ func NewTransparentVlanEndpointClient(
113110// Adds interfaces to the vnet (created if not existing) and vm namespace
114111func (client * TransparentVlanEndpointClient ) AddEndpoints (epInfo * EndpointInfo ) error {
115112 // VM Namespace
116- err := client .PopulateVM (epInfo )
117- if err != nil {
113+ if err := client .ensureCleanPopulateVM (); err != nil {
114+ return errors .Wrap (err , "failed to ensure both network namespace and vlan veth were present or both absent" )
115+ }
116+ if err := client .PopulateVM (epInfo ); err != nil {
118117 return err
119118 }
120119 if err := client .AddSnatEndpoint (); err != nil {
@@ -126,32 +125,80 @@ func (client *TransparentVlanEndpointClient) AddEndpoints(epInfo *EndpointInfo)
126125 })
127126}
128127
129- func (client * TransparentVlanEndpointClient ) createNetworkNamespace (vmNS , numRetries int ) error {
130- var isNamespaceUnique bool
131-
132- for i := 0 ; i < numRetries ; i ++ {
133- vnetNS , err := client .netnsClient .NewNamed (client .vnetNSName )
134- if err != nil {
135- return errors .Wrap (err , "failed to create vnet ns" )
136- }
137- logger .Info ("Vnet Namespace created" , zap .String ("vnetNS" , client .netnsClient .NamespaceUniqueID (vnetNS )))
138- if ! client .netnsClient .IsNamespaceEqual (vnetNS , vmNS ) {
139- client .vnetNSFileDescriptor = vnetNS
140- isNamespaceUnique = true
141- break
128+ // Called from AddEndpoints, Namespace: VM and Vnet
129+ func (client * TransparentVlanEndpointClient ) ensureCleanPopulateVM () error {
130+ // Clean up vlan interface in the VM namespace and ensure the network namespace (if it exists) has a vlan interface
131+ logger .Info ("Checking if NS and vlan veth exists..." )
132+ var existingErr error
133+ client .vnetNSFileDescriptor , existingErr = client .netnsClient .GetFromName (client .vnetNSName )
134+ if existingErr == nil {
135+ // The namespace exists
136+ vlanIfErr := ExecuteInNS (client .nsClient , client .vnetNSName , func () error {
137+ // Ensure the vlan interface exists in the namespace
138+ _ , err := client .netioshim .GetNetworkInterfaceByName (client .vlanIfName )
139+ return errors .Wrap (err , "failed to get vlan interface in namespace" )
140+ })
141+ if vlanIfErr != nil {
142+ // Assume any error is the vlan interface not found
143+ logger .Info ("vlan interface doesn't exist even though network namespace exists, deleting network namespace..." , zap .String ("message" , vlanIfErr .Error ()))
144+ delErr := client .netnsClient .DeleteNamed (client .vnetNSName )
145+ if delErr != nil {
146+ return errors .Wrap (delErr , "failed to cleanup/delete ns after noticing vlan veth does not exist" )
147+ }
142148 }
143- logger .Info ("Vnet Namespace is the same as VM namespace. Deleting and retrying..." )
144- delErr := client .netnsClient .DeleteNamed (client .vnetNSName )
145- if delErr != nil {
146- logger .Error ("failed to cleanup/delete ns after failing to create vlan veth" , zap .Any ("error:" , delErr .Error ()))
149+ }
150+ // Delete the vlan interface in the VM namespace if it exists
151+ _ , vlanIfInVMErr := client .netioshim .GetNetworkInterfaceByName (client .vlanIfName )
152+ if vlanIfInVMErr == nil {
153+ // The vlan interface exists in the VM ns because it failed to move into the network ns previously and needs to be cleaned up
154+ logger .Info ("vlan interface exists on the VM namespace, deleting" , zap .String ("vlanIfName" , client .vlanIfName ))
155+ if delErr := client .netlink .DeleteLink (client .vlanIfName ); delErr != nil {
156+ return errors .Wrap (delErr , "failed to clean up vlan interface" )
147157 }
148- time .Sleep (time .Duration (sleepInMs ) * time .Millisecond )
149158 }
159+ return nil
160+ }
161+
162+ // Called from PopulateVM, Namespace: VM
163+ func (client * TransparentVlanEndpointClient ) createNetworkNamespace (vmNS int ) error {
164+ // If this call succeeds, the namespace is set to the new namespace
165+ vnetNS , err := client .netnsClient .NewNamed (client .vnetNSName )
166+ if err != nil {
167+ return errors .Wrap (err , "failed to create vnet ns" )
168+ }
169+ logger .Info ("Vnet Namespace created" , zap .String ("vnetNS" , client .netnsClient .NamespaceUniqueID (vnetNS )), zap .String ("vmNS" , client .netnsClient .NamespaceUniqueID (vmNS )))
170+ if ! client .netnsClient .IsNamespaceEqual (vnetNS , vmNS ) {
171+ client .vnetNSFileDescriptor = vnetNS
172+ return nil
173+ }
174+ // the vnet and vm namespace are the same by this point
175+ logger .Info ("Vnet Namespace is the same as VM namespace. Deleting..." )
176+ delErr := client .netnsClient .DeleteNamed (client .vnetNSName )
177+ if delErr != nil {
178+ logger .Error ("failed to cleanup/delete ns after noticing vnet ns is the same as vm ns" , zap .Any ("error:" , delErr .Error ()))
179+ }
180+ return errors .Wrap (errNamespaceCreation , "vnet and vm namespace are the same" )
181+ }
150182
151- if ! isNamespaceUnique {
152- return errors .Wrap (errNamespaceCreation , "vnet and vm namespace are same" )
183+ // Called from PopulateVM, Namespace: VM and namespace represented by fd
184+ func (client * TransparentVlanEndpointClient ) setLinkNetNSAndConfirm (name string , fd uintptr ) error {
185+ logger .Info ("Move link to NS" , zap .String ("ifName" , name ), zap .Any ("NSFileDescriptor" , fd ))
186+ err := client .netlink .SetLinkNetNs (name , fd )
187+ if err != nil {
188+ return errors .Wrapf (err , "failed to set %v inside namespace %v" , name , fd )
153189 }
154190
191+ // confirm veth was moved successfully
192+ err = RunWithRetries (func () error {
193+ // retry checking in the namespace if the interface is not detected
194+ return ExecuteInNS (client .nsClient , client .vnetNSName , func () error {
195+ _ , ifDetectedErr := client .netioshim .GetNetworkInterfaceByName (client .vlanIfName )
196+ return errors .Wrap (ifDetectedErr , "failed to get vlan veth in namespace" )
197+ })
198+ }, numRetries , sleepInMs )
199+ if err != nil {
200+ return errors .Wrapf (err , "failed to detect %v inside namespace %v" , name , fd )
201+ }
155202 return nil
156203}
157204
@@ -169,10 +216,13 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
169216 // This will also (we assume) mean the vlan veth does not exist
170217 if existingErr != nil {
171218 // We assume the only possible error is that the namespace doesn't exist
172- logger .Info ("No existing NS detected. Creating the vnet namespace and switching to it" )
219+ logger .Info ("No existing NS detected. Creating the vnet namespace and switching to it" , zap . String ( "message" , existingErr . Error ()) )
173220
174- if err = client .createNetworkNamespace (vmNS , numRetries ); err != nil {
175- return errors .Wrap (err , "" )
221+ err = RunWithRetries (func () error {
222+ return client .createNetworkNamespace (vmNS )
223+ }, numRetries , sleepInMs )
224+ if err != nil {
225+ return errors .Wrap (err , "failed to create network namespace" )
176226 }
177227
178228 deleteNSIfNotNilErr := client .netnsClient .Set (vmNS )
@@ -214,6 +264,8 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
214264 // Any failure to add the link should error (auto delete NS)
215265 return errors .Wrap (deleteNSIfNotNilErr , "failed to create vlan vnet link after making new ns" )
216266 }
267+ // Prevent accidentally deleting NS and vlan interface since we ignore this error
268+ deleteNSIfNotNilErr = nil
217269 }
218270 defer func () {
219271 if deleteNSIfNotNilErr != nil {
@@ -225,12 +277,11 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
225277 }()
226278
227279 // sometimes there is slight delay in interface creation. check if it exists
228- for i := 0 ; i < numRetries ; i ++ {
229- if _ , err = client .netioshim .GetNetworkInterfaceByName (client .vlanIfName ); err == nil {
230- break
231- }
232- time .Sleep (time .Duration (sleepInMs ) * time .Millisecond )
233- }
280+ err = RunWithRetries (func () error {
281+ _ , err = client .netioshim .GetNetworkInterfaceByName (client .vlanIfName )
282+ return errors .Wrap (err , "failed to get vlan veth" )
283+ }, numRetries , sleepInMs )
284+
234285 if err != nil {
235286 deleteNSIfNotNilErr = errors .Wrapf (err , "failed to get vlan veth interface:%s" , client .vlanIfName )
236287 return deleteNSIfNotNilErr
@@ -242,12 +293,13 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
242293 }
243294 // vlan veth was created successfully, so move the vlan veth you created
244295 logger .Info ("Move vlan link to vnet NS" , zap .String ("vlanIfName" , client .vlanIfName ), zap .Any ("vnetNSFileDescriptor" , uintptr (client .vnetNSFileDescriptor )))
245- deleteNSIfNotNilErr = client .netlink . SetLinkNetNs (client .vlanIfName , uintptr (client .vnetNSFileDescriptor ))
296+ deleteNSIfNotNilErr = client .setLinkNetNSAndConfirm (client .vlanIfName , uintptr (client .vnetNSFileDescriptor ))
246297 if deleteNSIfNotNilErr != nil {
247- return errors .Wrap (deleteNSIfNotNilErr , "deleting vlan veth in vm ns due to addendpoint failure " )
298+ return errors .Wrap (deleteNSIfNotNilErr , "failed to move or detect vlan veth inside vnet namespace " )
248299 }
300+ logger .Info ("Moving vlan veth into namespace confirmed" )
249301 } else {
250- logger .Info ("Existing NS detected. Assuming exists too " , zap .String ("vnetNSName" , client .vnetNSName ), zap .String ("vlanIfName" , client .vlanIfName ))
302+ logger .Info ("Existing NS detected. vlan interface should exist or namespace would've been deleted. " , zap .String ("vnetNSName" , client .vnetNSName ), zap .String ("vlanIfName" , client .vlanIfName ))
251303 }
252304
253305 // Get the default constant host veth mac
@@ -260,6 +312,27 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
260312 if err = client .netUtilsClient .CreateEndpoint (client .vnetVethName , client .containerVethName , mac ); err != nil {
261313 return errors .Wrap (err , "failed to create veth pair" )
262314 }
315+
316+ // Ensure vnet veth is created, as there may be a slight delay
317+ err = RunWithRetries (func () error {
318+ _ , getErr := client .netioshim .GetNetworkInterfaceByName (client .vnetVethName )
319+ return errors .Wrap (getErr , "failed to get vnet veth" )
320+ }, numRetries , sleepInMs )
321+ if err != nil {
322+ return errors .Wrap (err , "vnet veth does not exist" )
323+ }
324+
325+ // Ensure container veth is created, as there may be a slight delay
326+ var containerIf * net.Interface
327+ err = RunWithRetries (func () error {
328+ var getErr error
329+ containerIf , getErr = client .netioshim .GetNetworkInterfaceByName (client .containerVethName )
330+ return errors .Wrap (getErr , "failed to get container veth" )
331+ }, numRetries , sleepInMs )
332+ if err != nil {
333+ return errors .Wrap (err , "container veth does not exist" )
334+ }
335+
263336 // Disable RA for veth pair, and delete if any failure
264337 if err = client .netUtilsClient .DisableRAForInterface (client .vnetVethName ); err != nil {
265338 if delErr := client .netlink .DeleteLink (client .vnetVethName ); delErr != nil {
@@ -274,16 +347,11 @@ func (client *TransparentVlanEndpointClient) PopulateVM(epInfo *EndpointInfo) er
274347 return errors .Wrap (err , "failed to disable RA on container veth, deleting" )
275348 }
276349
277- if err = client .netlink . SetLinkNetNs (client .vnetVethName , uintptr (client .vnetNSFileDescriptor )); err != nil {
350+ if err = client .setLinkNetNSAndConfirm (client .vnetVethName , uintptr (client .vnetNSFileDescriptor )); err != nil {
278351 if delErr := client .netlink .DeleteLink (client .vnetVethName ); delErr != nil {
279352 logger .Error ("Deleting vnet veth failed on addendpoint failure with" , zap .Error (delErr ))
280353 }
281- return errors .Wrap (err , "failed to move vnetVethName into vnet ns, deleting" )
282- }
283-
284- containerIf , err := client .netioshim .GetNetworkInterfaceByName (client .containerVethName )
285- if err != nil {
286- return errors .Wrap (err , "container veth does not exist" )
354+ return errors .Wrap (err , "failed to move or detect vnetVethName in vnet ns, deleting" )
287355 }
288356 client .containerMac = containerIf .HardwareAddr
289357 return nil
@@ -492,7 +560,7 @@ func (client *TransparentVlanEndpointClient) GetVnetRoutes(ipAddresses []net.IPN
492560
493561// Helper that creates routing rules for the current NS which direct packets
494562// to the virtual gateway ip on linkToName device interface
495- // Route 1: 169.254.1 .1 dev <linkToName>
563+ // Route 1: 169.254.2 .1 dev <linkToName>
496564// Route 2: default via 169.254.2.1 dev <linkToName>
497565func (client * TransparentVlanEndpointClient ) addDefaultRoutes (linkToName string , table int ) error {
498566 // Add route for virtualgwip (ip route add 169.254.2.1/32 dev eth0)
@@ -637,3 +705,16 @@ func ExecuteInNS(nsc NamespaceClientInterface, nsName string, f func() error) er
637705 }()
638706 return f ()
639707}
708+
709+ func RunWithRetries (f func () error , maxRuns , sleepMs int ) error {
710+ var err error
711+ for i := 0 ; i < maxRuns ; i ++ {
712+ err = f ()
713+ if err == nil {
714+ break
715+ }
716+ logger .Info ("Retrying after delay..." , zap .String ("error" , err .Error ()), zap .Int ("retry" , i ), zap .Int ("sleepMs" , sleepMs ))
717+ time .Sleep (time .Duration (sleepMs ) * time .Millisecond )
718+ }
719+ return err
720+ }
0 commit comments