Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 48 additions & 32 deletions plugins/meta/portmap/portmap_nftables.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ import (
const (
tableName = "cni_hostport"

// Intermediate chain to jump to both 'hostip_hostports' and 'hostports'
hostPortsAllChain = "hostports_all"
// This chain was used for rules with hostIP, we do not use it anymore,
// but we keep it to make upgrade transparent
hostIPHostPortsChain = "hostip_hostports"
hostPortsChain = "hostports"
masqueradingChain = "masquerading"
// Chain containing all the rules
hostPortsChain = "hostports"
masqueradingChain = "masquerading"
)

// The nftables portmap implementation is fairly similar to the iptables implementation:
Expand Down Expand Up @@ -104,58 +109,69 @@ func (pmNFT *portMapperNFTables) forwardPorts(config *PortMapConf, containerNet
})

tx.Add(&knftables.Chain{
Name: "hostports",
Name: hostPortsChain,
})

tx.Add(&knftables.Chain{
Name: "hostip_hostports",
Name: hostIPHostPortsChain,
})

// setup intermediate chain
tx.Add(&knftables.Chain{
Name: "prerouting",
Type: knftables.PtrTo(knftables.NATType),
Hook: knftables.PtrTo(knftables.PreroutingHook),
Priority: knftables.PtrTo(knftables.DNATPriority),
Name: hostPortsAllChain,
})

tx.Flush(&knftables.Chain{
Name: "prerouting",
Name: hostPortsAllChain,
})

tx.Add(&knftables.Rule{
Chain: "prerouting",
Chain: hostPortsAllChain,
Rule: knftables.Concat(
conditions,
"jump", hostIPHostPortsChain,
),
})

tx.Add(&knftables.Rule{
Chain: "prerouting",
Chain: hostPortsAllChain,
Rule: knftables.Concat(
conditions,
"jump", hostPortsChain,
),
})

tx.Add(&knftables.Chain{
Name: "output",
Name: "prerouting",
Type: knftables.PtrTo(knftables.NATType),
Hook: knftables.PtrTo(knftables.OutputHook),
Hook: knftables.PtrTo(knftables.PreroutingHook),
Priority: knftables.PtrTo(knftables.DNATPriority),
})
tx.Flush(&knftables.Chain{
Name: "output",
Name: "prerouting",
})
tx.Add(&knftables.Rule{
Chain: "output",
Chain: "prerouting",
Rule: knftables.Concat(
conditions,
"jump", hostIPHostPortsChain,
"fib daddr type local",
"jump", hostPortsAllChain,
),
})

tx.Add(&knftables.Chain{
Name: "output",
Type: knftables.PtrTo(knftables.NATType),
Hook: knftables.PtrTo(knftables.OutputHook),
Priority: knftables.PtrTo(knftables.DNATPriority),
})
tx.Flush(&knftables.Chain{
Name: "output",
})
tx.Add(&knftables.Rule{
Chain: "output",
Rule: knftables.Concat(
conditions,
"fib daddr type local",
"jump", hostPortsChain,
"jump", hostPortsAllChain,
),
})

Expand Down Expand Up @@ -184,8 +200,10 @@ func (pmNFT *portMapperNFTables) forwardPorts(config *PortMapConf, containerNet
}

if useHostIP {
// we add the rule to 'hostports' instead of 'hostip_hostports'
// as we want to remove 'hostip_hostports' long-term
tx.Add(&knftables.Rule{
Chain: hostIPHostPortsChain,
Chain: hostPortsChain,
Rule: knftables.Concat(
ipX, "daddr", e.HostIP,
e.Protocol, "dport", e.HostPort,
Expand Down Expand Up @@ -243,17 +261,21 @@ func (pmNFT *portMapperNFTables) forwardPorts(config *PortMapConf, containerNet
func (pmNFT *portMapperNFTables) checkPorts(config *PortMapConf, containerNet net.IPNet) error {
isV6 := (containerNet.IP.To4() == nil)

var hostPorts, hostIPHostPorts, masqueradings int
var hostPorts, masqueradings int
for _, e := range config.RuntimeConfig.PortMaps {
if e.HostIP != "" {
hostIPHostPorts++
} else {
hostPorts++
Copy link
Contributor

@danwinship danwinship Nov 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, suppose you have a pod with hostIP hostports. They get created in hostip_hostports by the old plugin. Then you upgrade your plugins. Then the CRI does a CHECK. With this code, the CHECK would return an error. So then, presumably, the CRI will ADD again, and then forwardPorts will duplicate the hostIP rules in the hostports chain, without deleting the ones from the hostip_hostports chain...

That's not awful though; the extra rules are redundant but not incorrect. And it would be a little bit annoying to make checkPorts try to deal with accepting either the old layout or the new.

(Or would CRI be expected to do a DEL before the ADD anyway?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% agree with the analysis, let's leave it like that :)
To encounter this bug, you would have to:

  • use the nftables backend
  • use hostIP
  • use cri-o (containerd doesn't use CHECK if I remember correctly)
  • use IPv4 only
  • not drain your node before update !!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Or would CRI be expected to do a DEL before the ADD anyway?)

Yes, you must always do a DEL before ADD.

hostIP := net.ParseIP(e.HostIP)
isHostV6 := (hostIP.To4() == nil)
// Ignore wrong-IP-family HostIPs
if isV6 != isHostV6 {
continue
}
}
hostPorts++
}
if *config.SNAT {
masqueradings = len(config.RuntimeConfig.PortMaps)
if isV6 {
masqueradings = 1
if !isV6 {
masqueradings *= 2
}
}
Expand All @@ -268,12 +290,6 @@ func (pmNFT *portMapperNFTables) checkPorts(config *PortMapConf, containerNet ne
return err
}
}
if hostIPHostPorts > 0 {
err := checkPortsAgainstRules(nft, hostIPHostPortsChain, config.ContainerID, hostIPHostPorts)
if err != nil {
return err
}
}
if masqueradings > 0 {
err := checkPortsAgainstRules(nft, masqueradingChain, config.ContainerID, masqueradings)
if err != nil {
Expand Down
172 changes: 133 additions & 39 deletions plugins/meta/portmap/portmap_nftables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,31 @@ import (

var _ = Describe("portmapping configuration (nftables)", func() {
containerID := "icee6giejonei6so"
configTmpl := `{
"name": "test",
"type": "portmap",
"cniVersion": "%s",
"backend": "nftables",
"runtimeConfig": {
"portMappings": [
{ "hostPort": 8080, "containerPort": 80, "protocol": "tcp"},
{ "hostPort": 8081, "containerPort": 80, "protocol": "tcp"},
{ "hostPort": 8080, "containerPort": 81, "protocol": "udp"},
{ "hostPort": 8082, "containerPort": 82, "protocol": "udp"},
{ "hostPort": 8083, "containerPort": 83, "protocol": "tcp", "hostIP": "192.168.0.2"},
{ "hostPort": 8084, "containerPort": 84, "protocol": "tcp", "hostIP": "0.0.0.0"},
{ "hostPort": 8085, "containerPort": 85, "protocol": "tcp", "hostIP": "2001:db8:a::1"},
{ "hostPort": 8086, "containerPort": 86, "protocol": "tcp", "hostIP": "::"}
]
},
"snat": true,
"conditionsV4": ["a", "b"],
"conditionsV6": ["c", "d"]
}`
containerNet4, err := types.ParseCIDR("10.0.0.2/24")
Expect(err).NotTo(HaveOccurred())
containerNet6, err := types.ParseCIDR("2001:db8::2/64")
Expect(err).NotTo(HaveOccurred())

for _, ver := range []string{"0.3.0", "0.3.1", "0.4.0", "1.0.0"} {
// Redefine ver inside for scope so real value is picked up by each dynamically defined It()
Expand All @@ -45,90 +70,159 @@ var _ = Describe("portmapping configuration (nftables)", func() {
}
})

It(fmt.Sprintf("[%s] generates correct rules on ADD", ver), func() {
configBytes := []byte(fmt.Sprintf(`{
"name": "test",
"type": "portmap",
"cniVersion": "%s",
"backend": "nftables",
"runtimeConfig": {
"portMappings": [
{ "hostPort": 8080, "containerPort": 80, "protocol": "tcp"},
{ "hostPort": 8081, "containerPort": 80, "protocol": "tcp"},
{ "hostPort": 8080, "containerPort": 81, "protocol": "udp"},
{ "hostPort": 8082, "containerPort": 82, "protocol": "udp"},
{ "hostPort": 8083, "containerPort": 83, "protocol": "tcp", "hostIP": "192.168.0.2"},
{ "hostPort": 8084, "containerPort": 84, "protocol": "tcp", "hostIP": "0.0.0.0"},
{ "hostPort": 8085, "containerPort": 85, "protocol": "tcp", "hostIP": "2001:db8:a::1"},
{ "hostPort": 8086, "containerPort": 86, "protocol": "tcp", "hostIP": "::"}
]
},
"snat": true,
"conditionsV4": ["a", "b"],
"conditionsV6": ["c", "d"]
}`, ver))
It(fmt.Sprintf("[%s] generates correct rules", ver), func() {
configBytes := []byte(fmt.Sprintf(configTmpl, ver))

conf, _, err := parseConfig(configBytes, "foo")
Expect(err).NotTo(HaveOccurred())
conf.ContainerID = containerID

containerNet, err := types.ParseCIDR("10.0.0.2/24")
Expect(err).NotTo(HaveOccurred())

err = pmNFT.forwardPorts(conf, *containerNet)
err = pmNFT.forwardPorts(conf, *containerNet4)
Expect(err).NotTo(HaveOccurred())

expectedRules := strings.TrimSpace(`
add table ip cni_hostport { comment "CNI portmap plugin" ; }
add chain ip cni_hostport hostip_hostports
add chain ip cni_hostport hostports
add chain ip cni_hostport hostports_all
add chain ip cni_hostport masquerading { type nat hook postrouting priority 100 ; }
add chain ip cni_hostport output { type nat hook output priority -100 ; }
add chain ip cni_hostport prerouting { type nat hook prerouting priority -100 ; }
add rule ip cni_hostport hostip_hostports ip daddr 192.168.0.2 tcp dport 8083 dnat to 10.0.0.2:83 comment "icee6giejonei6so"
add rule ip cni_hostport hostports tcp dport 8080 dnat to 10.0.0.2:80 comment "icee6giejonei6so"
add rule ip cni_hostport hostports tcp dport 8081 dnat to 10.0.0.2:80 comment "icee6giejonei6so"
add rule ip cni_hostport hostports udp dport 8080 dnat to 10.0.0.2:81 comment "icee6giejonei6so"
add rule ip cni_hostport hostports udp dport 8082 dnat to 10.0.0.2:82 comment "icee6giejonei6so"
add rule ip cni_hostport hostports ip daddr 192.168.0.2 tcp dport 8083 dnat to 10.0.0.2:83 comment "icee6giejonei6so"
add rule ip cni_hostport hostports tcp dport 8084 dnat to 10.0.0.2:84 comment "icee6giejonei6so"
add rule ip cni_hostport hostports_all jump hostip_hostports
add rule ip cni_hostport hostports_all jump hostports
add rule ip cni_hostport masquerading ip saddr 10.0.0.2 ip daddr 10.0.0.2 masquerade comment "icee6giejonei6so"
add rule ip cni_hostport masquerading ip saddr 127.0.0.1 ip daddr 10.0.0.2 masquerade comment "icee6giejonei6so"
add rule ip cni_hostport output a b jump hostip_hostports
add rule ip cni_hostport output a b fib daddr type local jump hostports
add rule ip cni_hostport prerouting a b jump hostip_hostports
add rule ip cni_hostport prerouting a b jump hostports
add rule ip cni_hostport output a b fib daddr type local jump hostports_all
add rule ip cni_hostport prerouting a b fib daddr type local jump hostports_all
`)
actualRules := strings.TrimSpace(ipv4Fake.Dump())
Expect(actualRules).To(Equal(expectedRules))

// Disable snat, generate IPv6 rules
*conf.SNAT = false
containerNet, err = types.ParseCIDR("2001:db8::2/64")
Expect(err).NotTo(HaveOccurred())

err = pmNFT.forwardPorts(conf, *containerNet)
err = pmNFT.forwardPorts(conf, *containerNet6)
Expect(err).NotTo(HaveOccurred())

expectedRules = strings.TrimSpace(`
add table ip6 cni_hostport { comment "CNI portmap plugin" ; }
add chain ip6 cni_hostport hostip_hostports
add chain ip6 cni_hostport hostports
add chain ip6 cni_hostport hostports_all
add chain ip6 cni_hostport output { type nat hook output priority -100 ; }
add chain ip6 cni_hostport prerouting { type nat hook prerouting priority -100 ; }
add rule ip6 cni_hostport hostip_hostports ip6 daddr 2001:db8:a::1 tcp dport 8085 dnat to [2001:db8::2]:85 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports tcp dport 8080 dnat to [2001:db8::2]:80 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports tcp dport 8081 dnat to [2001:db8::2]:80 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports udp dport 8080 dnat to [2001:db8::2]:81 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports udp dport 8082 dnat to [2001:db8::2]:82 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports ip6 daddr 2001:db8:a::1 tcp dport 8085 dnat to [2001:db8::2]:85 comment "icee6giejonei6so"
add rule ip6 cni_hostport hostports tcp dport 8086 dnat to [2001:db8::2]:86 comment "icee6giejonei6so"
add rule ip6 cni_hostport output c d jump hostip_hostports
add rule ip6 cni_hostport output c d fib daddr type local jump hostports
add rule ip6 cni_hostport prerouting c d jump hostip_hostports
add rule ip6 cni_hostport prerouting c d jump hostports
add rule ip6 cni_hostport hostports_all jump hostip_hostports
add rule ip6 cni_hostport hostports_all jump hostports
add rule ip6 cni_hostport output c d fib daddr type local jump hostports_all
add rule ip6 cni_hostport prerouting c d fib daddr type local jump hostports_all
`)
actualRules = strings.TrimSpace(ipv6Fake.Dump())
Expect(actualRules).To(Equal(expectedRules))

err = pmNFT.unforwardPorts(conf)
Expect(err).NotTo(HaveOccurred())

expectedRules = strings.TrimSpace(`
add table ip cni_hostport { comment "CNI portmap plugin" ; }
add chain ip cni_hostport hostip_hostports
add chain ip cni_hostport hostports
add chain ip cni_hostport hostports_all
add chain ip cni_hostport masquerading { type nat hook postrouting priority 100 ; }
add chain ip cni_hostport output { type nat hook output priority -100 ; }
add chain ip cni_hostport prerouting { type nat hook prerouting priority -100 ; }
add rule ip cni_hostport hostports_all jump hostip_hostports
add rule ip cni_hostport hostports_all jump hostports
add rule ip cni_hostport output a b fib daddr type local jump hostports_all
add rule ip cni_hostport prerouting a b fib daddr type local jump hostports_all
`)
actualRules = strings.TrimSpace(ipv4Fake.Dump())
Expect(actualRules).To(Equal(expectedRules))

expectedRules = strings.TrimSpace(`
add table ip6 cni_hostport { comment "CNI portmap plugin" ; }
add chain ip6 cni_hostport hostip_hostports
add chain ip6 cni_hostport hostports
add chain ip6 cni_hostport hostports_all
add chain ip6 cni_hostport output { type nat hook output priority -100 ; }
add chain ip6 cni_hostport prerouting { type nat hook prerouting priority -100 ; }
add rule ip6 cni_hostport hostports_all jump hostip_hostports
add rule ip6 cni_hostport hostports_all jump hostports
add rule ip6 cni_hostport output c d fib daddr type local jump hostports_all
add rule ip6 cni_hostport prerouting c d fib daddr type local jump hostports_all
`)
actualRules = strings.TrimSpace(ipv6Fake.Dump())
Expect(actualRules).To(Equal(expectedRules))
})

It(fmt.Sprintf("[%s] has working CHECK", ver), func() {
configBytes := []byte(fmt.Sprintf(configTmpl, ver))

conf, _, err := parseConfig(configBytes, "foo")
Expect(err).NotTo(HaveOccurred())
conf.ContainerID = containerID

// CHECK KO: Missing all rules
err = pmNFT.checkPorts(conf, *containerNet4)
Expect(err).To(HaveOccurred())
err = pmNFT.checkPorts(conf, *containerNet6)
Expect(err).To(HaveOccurred())

*conf.SNAT = false

// ADD with SNAT=false
err = pmNFT.forwardPorts(conf, *containerNet4)
Expect(err).NotTo(HaveOccurred())
err = pmNFT.forwardPorts(conf, *containerNet6)
Expect(err).NotTo(HaveOccurred())

// CHECK OK: no SNAT rules
err = pmNFT.checkPorts(conf, *containerNet4)
Expect(err).NotTo(HaveOccurred())
err = pmNFT.checkPorts(conf, *containerNet6)
Expect(err).NotTo(HaveOccurred())

*conf.SNAT = true

// CHECK KO: missing SNAT rules
err = pmNFT.checkPorts(conf, *containerNet4)
Expect(err).To(HaveOccurred())
err = pmNFT.checkPorts(conf, *containerNet6)
Expect(err).To(HaveOccurred())

// ADD with SNAT=true
err = pmNFT.forwardPorts(conf, *containerNet4)
Expect(err).NotTo(HaveOccurred())
err = pmNFT.forwardPorts(conf, *containerNet6)
Expect(err).NotTo(HaveOccurred())

// CHECK OK: All rules present
err = pmNFT.checkPorts(conf, *containerNet4)
Expect(err).NotTo(HaveOccurred())
err = pmNFT.checkPorts(conf, *containerNet6)
Expect(err).NotTo(HaveOccurred())

// DELETE
err = pmNFT.unforwardPorts(conf)
Expect(err).NotTo(HaveOccurred())

// CHECK KO: Missing all rules
err = pmNFT.checkPorts(conf, *containerNet4)
Expect(err).To(HaveOccurred())
err = pmNFT.checkPorts(conf, *containerNet6)
Expect(err).To(HaveOccurred())
})
})
}
})