Skip to content

Commit 7fcd818

Browse files
committed
fix(network): add cleanupOrphanTaps() to remove orphan TAPs by carrier state
- Call new function cleanupOrphanTaps() at the start of DynamicNetwork.NetworkSetup(). - Add cleanupOrphanTaps(): scan netns for interfaces matching ^tap.*_urunc$ and use kernel carrier/operational state as the sole criterion: - NO-CARRIER => delete orphan (remove TC/qdisc, then delete link) - LOWER_UP / operational up / FlagRunning => treat as in-use and abort - Do not scan /proc or check /dev/net/tun; do not attempt to reuse TAPs. - Skip cleanup when no container interface (e.g. no eth0) is present. - Remove PID/FD based checks and netns flock; document the single unikernel-per-netns assumption. - Preserve networkSetup() create-only semantics and ensure TC/qdisc cleanup before link deletion. This resolves an issue on Kubernetes where restarting urunc left orphan TAP devices in the pod network namespace and prevented subsequent network setup. Signed-off-by: Sidney Chang <2190206983@qq.com>
1 parent 7777b4a commit 7fcd818

File tree

2 files changed

+65
-0
lines changed

2 files changed

+65
-0
lines changed

pkg/network/network.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"errors"
1919
"fmt"
2020
"net"
21+
"regexp"
2122
"strings"
2223

2324
"github.com/jackpal/gateway"
@@ -389,3 +390,62 @@ func deleteTapDevice(device netlink.Link) error {
389390
}
390391
return nil
391392
}
393+
394+
func cleanupOrphanTaps() error {
395+
netlog.Debug("running cleanupOrphanTaps (carrier-state based)")
396+
397+
// If there is no container interface (e.g. no eth0), do not attempt to create/delete taps.
398+
// This avoids touching taps in netns that aren't ready or belong to other runtimes (ctr).
399+
if _, err := netlink.LinkByName(DefaultInterface); err != nil {
400+
netlog.Debug("no container interface found in namespace; skipping orphan TAP cleanup")
401+
return nil
402+
}
403+
404+
// Per design: assume at-most-one unikernel per netns. No inter-process netns lock is used.
405+
406+
handle, err := netlink.NewHandle()
407+
if err != nil {
408+
return fmt.Errorf("failed to get netlink handle: %w", err)
409+
}
410+
defer handle.Close()
411+
412+
links, err := handle.LinkList()
413+
if err != nil {
414+
return fmt.Errorf("failed to list links: %w", err)
415+
}
416+
417+
tapRe := regexp.MustCompile(`^tap.*_urunc$`)
418+
for _, link := range links {
419+
attrs := link.Attrs()
420+
if attrs == nil {
421+
continue
422+
}
423+
name := attrs.Name
424+
if !tapRe.MatchString(name) {
425+
continue
426+
}
427+
428+
// The device is in a 'Zombie' state: Administrative status is UP, but
429+
// Operational status is DOWN with NO-CARRIER.
430+
// In the Linux TUN/TAP driver model, NO-CARRIER on an UP interface
431+
// definitively proves that no userspace process holds the file descriptor
432+
// for this device.
433+
if (attrs.Flags&net.FlagRunning) != 0 || attrs.OperState == netlink.OperUp {
434+
return fmt.Errorf("found tap %s with carrier/oper state UP: aborting cleanup (unikernel may be running)", name)
435+
}
436+
437+
netlog.Debugf("deleting orphan tap %s (no carrier)", name)
438+
if err := deleteAllTCFilters(link); err != nil {
439+
return fmt.Errorf("failed to delete tc filters for %s: %w", name, err)
440+
}
441+
if err := deleteAllQDiscs(link); err != nil {
442+
return fmt.Errorf("failed to delete qdiscs for %s: %w", name, err)
443+
}
444+
if err := deleteTapDevice(link); err != nil {
445+
return fmt.Errorf("failed to delete tap %s: %w", name, err)
446+
}
447+
netlog.Debugf("deleted orphan tap %s", name)
448+
}
449+
450+
return nil
451+
}

pkg/network/network_dynamic.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ type DynamicNetwork struct {
3535
// for multiple unikernels in the same pod/network namespace.
3636
// See: https://github.com/urunc-dev/urunc/issues/13
3737
func (n DynamicNetwork) NetworkSetup(uid uint32, gid uint32) (*UnikernelNetworkInfo, error) {
38+
// Attempt to clean up orphan TAPs created by urunc in this netns
39+
if err := cleanupOrphanTaps(); err != nil {
40+
return nil, fmt.Errorf("cleanupOrphanTaps failed: %w", err)
41+
}
42+
3843
tapIndex, err := getTapIndex()
3944
if err != nil {
4045
return nil, fmt.Errorf("getTapIndex failed: %w", err)

0 commit comments

Comments
 (0)