Skip to content

Commit 8b650b4

Browse files
authored
Merge pull request #4982 from hareeshpc/dev/bypass
Use accelerated device as Gateway interface
2 parents 57a2590 + 2cd8a7b commit 8b650b4

File tree

6 files changed

+189
-23
lines changed

6 files changed

+189
-23
lines changed
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Gateway Accelerated Interface Configuration
2+
3+
## Description
4+
5+
To provide hardware acceleration for traffic, both IN and OUT ports need to be a hardware
6+
accelerated netdevice backed by the Network Interface Card hardware itself.
7+
In case of external traffic, when one such port is the external OVS bridge, which for example has the gateway IP,
8+
such traffic (like host networking traffic) would not be accelerated.
9+
Using Switchdev VirtualFunction (VF) or SubFunction (SF) as a gateway interface allows to accelerate these too.
10+
11+
12+
## How it works?
13+
14+
Instead of using the gateway interface as the external bridge itself, use a switchdev VF or SF instead.
15+
This is depicted as following:
16+
17+
```
18+
+----------+
19+
| br-ext |
20+
+--------+ |
21+
| UPLINK | |
22+
+--------+ | patch +----------+
23+
| x---------x br-int |
24+
+--------+ +--------+ | port +----------+
25+
| NETDEV +-----+ REP | |
26+
+--------+ +--------+ |
27+
+----------+
28+
```
29+
30+
Where `UPLINK` is a port on an offloading capable network interface hardware, `NETDEV` is a switchdev function
31+
of this port and `REP` is a representor netdevice of the switchdev function.
32+
Node/Host IP assigned to `NETDEV` which make OVS to chose `REP` port for external flows instead of the bridge.
33+
34+
35+
## How to use?
36+
37+
Gateway accelerated interface can be used in two steps:
38+
39+
a) Creating and configuring the device.
40+
See figure above.
41+
An `UPLINK` device is connected to the OVS external bridge.
42+
An existing VF or SF `NETDEV` from the `UPLINK` is first selected as the the Gateway Interface. Its associated
43+
representor `REP` is plugged into the OVS external bridge (br-ext). The gateway IP is assigned to this interface
44+
instead of the OVS external bridge (br-ext).
45+
46+
b) Specify `NETDEV` as a gateway interface explicitly via `OVN_GATEWAY_OPTS` environment variable for
47+
ovnkube-node container. Example:
48+
49+
```yaml
50+
- name: OVN_GATEWAY_OPTS
51+
value: "--gateway-accelerated-interface=<<NETDEV>>"
52+
```
53+
54+
Note that this is mutually exclusive to the `--gateway-interface` flag for GATEWAY_OPTIONS.
55+
56+
c) Set the external-id on the bridge to detect the uplink device correctly. This is useful for instances where,
57+
the name of the bridge (eg: br-ext) does not use the uplink device (eg: p0) in its name. The uplink can also
58+
be a bond device.
59+
```bash
60+
ovs-vsctl br-set-external-id br-ext bridge-uplink p0
61+
```
62+
This gives more flexibility in detecting the uplink device in cases where the auto detection fails (like in case of
63+
bonded uplinks etc.)
64+
65+
## Verification
66+
67+
Openflow rules added to the external bridge will use this port as the IN/OUT port instead.
68+
69+
Example flows when pf0vf1 is the netdev and pf0vf1_r is the representor
70+
```bash
71+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=0, n_bytes=0, priority=500,ip,in_port="pf0vf1_r",nw_dst=169.254.0.1 actions=ct(table=5,zone=64002,nat)
72+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=655, n_bytes=129843, priority=500,ip,in_port="pf0vf1_r",nw_dst=10.96.0.0/16 actions=ct(commit,table=2,zone=64001,nat(src=169.254.0.2))
73+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=359877855, n_bytes=531033264511, priority=205,udp,in_port=p0,dl_dst=42:0b:9a:f1:83:b2,tp_dst=6081 actions=output:"pf0vf1_r"
74+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=6252796, n_bytes=775727815, priority=200,udp,in_port="pf0vf1_r",tp_dst=6081 actions=output:p0
75+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=1867752, n_bytes=294547557, priority=100,ip,in_port="pf0vf1_r" actions=ct(commit,zone=64000,exec(load:0x2->NXM_NX_CT_MARK[])),output:p0
76+
cookie=0xdeff105, duration=505314.637s, table=0, n_packets=22, n_bytes=1320, priority=10,in_port=p0,dl_dst=42:0b:9a:f1:83:b2 actions=output:"patch-brp0_c-23",output:"pf0vf1_r"
77+
cookie=0xdeff105, duration=505314.637s, table=1, n_packets=1313364, n_bytes=669490616, priority=100,ct_state=+est+trk,ct_mark=0x2,ip actions=output:"pf0vf1_r"
78+
cookie=0xdeff105, duration=505314.637s, table=1, n_packets=0, n_bytes=0, priority=100,ct_state=+rel+trk,ct_mark=0x2,ip actions=output:"pf0vf1_r"
79+
cookie=0xdeff105, duration=505314.637s, table=1, n_packets=0, n_bytes=0, priority=13,udp,in_port=p0,tp_dst=3784 actions=output:"patch-brp0_c-23",output:"pf0vf1_r"
80+
cookie=0xdeff105, duration=505314.637s, table=1, n_packets=493602, n_bytes=48384748, priority=10,dl_dst=42:0b:9a:f1:83:b2 actions=output:"pf0vf1_r"
81+
cookie=0xdeff105, duration=505314.637s, table=3, n_packets=694, n_bytes=276779, actions=move:NXM_OF_ETH_DST[]->NXM_OF_ETH_SRC[],mod_dl_dst:42:0b:9a:f1:83:b2,output:"pf0vf1_r"
82+
83+
84+
```

go-controller/pkg/config/config.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,12 @@ type GatewayConfig struct {
452452
Mode GatewayMode `gcfg:"mode"`
453453
// Interface is the network interface to use for the gateway in "shared" mode
454454
Interface string `gcfg:"interface"`
455-
// Exgress gateway interface is the optional network interface to use for external gw pods traffic.
455+
// GatewayAcceleratedInterface is the optional network interface to use for gateway traffic acceleration.
456+
// This is typically a VF or SF device. When specified it would be used as the in_port for Openflow rules
457+
// on the external bridge. The Host IP would be on this device.
458+
// Should be used mutually exclusive to the `--gateway-interface` flag.
459+
GatewayAcceleratedInterface string `gcfg:"gateway-accelerated-interface"`
460+
// Egress gateway interface is the optional network interface to use for external gw pods traffic.
456461
EgressGWInterface string `gcfg:"egw-interface"`
457462
// NextHop is the gateway IP address of Interface; will be autodetected if not given
458463
NextHop string `gcfg:"next-hop"`
@@ -1406,6 +1411,13 @@ var OVNGatewayFlags = []cli.Flag{
14061411
"interface. Only useful with \"init-gateways\"",
14071412
Destination: &cliConfig.Gateway.Interface,
14081413
},
1414+
&cli.StringFlag{
1415+
Name: "gateway-accelerated-interface",
1416+
Usage: "The optional network interface to use for gateway traffic acceleration. " +
1417+
"This is typically a VF or SF device. When specified it would be used as the in_port for Openflow rules " +
1418+
"on the external bridge. The Host IP would be on this device.",
1419+
Destination: &cliConfig.Gateway.GatewayAcceleratedInterface,
1420+
},
14091421
&cli.StringFlag{
14101422
Name: "exgw-interface",
14111423
Usage: "The interface on nodes that will be used for external gw network traffic. " +

go-controller/pkg/node/default_node_network_controller.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,9 +1180,8 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error {
11801180
// Note(adrianc): DPU deployments are expected to support the new shared gateway changes, upgrade flow
11811181
// is not needed. Future upgrade flows will need to take DPUs into account.
11821182
if config.OvnKubeNode.Mode != types.NodeModeDPUHost {
1183-
bridgeName := ""
11841183
if config.OvnKubeNode.Mode == types.NodeModeFull {
1185-
bridgeName = nc.Gateway.GetGatewayBridgeIface()
1184+
bridgeName := nc.Gateway.GetGatewayIface()
11861185
// Configure route for svc towards shared gw bridge
11871186
// Have to have the route to bridge for multi-NIC mode, where the default gateway may go to a non-OVS interface
11881187
if err := configureSvcRouteViaBridge(nc.routeManager, bridgeName); err != nil {

go-controller/pkg/node/gateway.go

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ type Gateway interface {
3232
Init(<-chan struct{}, *sync.WaitGroup) error
3333
Start()
3434
GetGatewayBridgeIface() string
35+
GetGatewayIface() string
3536
SetDefaultGatewayBridgeMAC(addr net.HardwareAddr)
3637
SetDefaultPodNetworkAdvertised(bool)
3738
Reconcile() error
@@ -456,6 +457,10 @@ func (g *gateway) GetGatewayBridgeIface() string {
456457
return g.openflowManager.getDefaultBridgeName()
457458
}
458459

460+
func (g *gateway) GetGatewayIface() string {
461+
return g.openflowManager.defaultBridge.getGatewayIface()
462+
}
463+
459464
// getMaxFrameLength returns the maximum frame size (ignoring VLAN header) that a gateway can handle
460465
func getMaxFrameLength() int {
461466
return config.Default.MTU + 14
@@ -537,6 +542,8 @@ type bridgeConfiguration struct {
537542
nodeName string
538543
bridgeName string
539544
uplinkName string
545+
gwIface string
546+
gwIfaceRep string
540547
ips []*net.IPNet
541548
interfaceID string
542549
macAddress net.HardwareAddr
@@ -546,11 +553,19 @@ type bridgeConfiguration struct {
546553
eipMarkIPs *markIPsCache
547554
}
548555

556+
func (b *bridgeConfiguration) getGatewayIface() string {
557+
// If gwIface is set, then accelerated GW interface is present and we use it. If else use external bridge instead.
558+
if b.gwIface != "" {
559+
return b.gwIface
560+
}
561+
return b.bridgeName
562+
}
563+
549564
// updateInterfaceIPAddresses sets and returns the bridge's current ips
550565
func (b *bridgeConfiguration) updateInterfaceIPAddresses(node *corev1.Node) ([]*net.IPNet, error) {
551566
b.Lock()
552567
defer b.Unlock()
553-
ifAddrs, err := getNetworkInterfaceIPAddresses(b.bridgeName)
568+
ifAddrs, err := getNetworkInterfaceIPAddresses(b.getGatewayIface())
554569
if err != nil {
555570
return nil, err
556571
}
@@ -578,6 +593,11 @@ func (b *bridgeConfiguration) updateInterfaceIPAddresses(node *corev1.Node) ([]*
578593

579594
func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubnets, gwIPs []*net.IPNet,
580595
advertised bool) (*bridgeConfiguration, error) {
596+
var intfRep string
597+
var err error
598+
isGWAcclInterface := false
599+
gwIntf := intfName
600+
581601
defaultNetConfig := &bridgeUDNConfiguration{
582602
masqCTMark: ctMarkOVN,
583603
subnets: config.Default.ClusterSubnets,
@@ -591,15 +611,47 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubn
591611
eipMarkIPs: newMarkIPsCache(),
592612
}
593613
res.netConfig[types.DefaultNetworkName].advertised.Store(advertised)
594-
gwIntf := intfName
595614

596-
if bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfName); err == nil {
615+
if config.Gateway.GatewayAcceleratedInterface != "" {
616+
// Try to get representor for the specified gateway device.
617+
// If function succeeds, then it is either a valid switchdev VF or SF, and we can use this accelerated device
618+
// for node IP, Host Ofport for Openflow etc.
619+
// If failed - error for improper configuration option
620+
intfRep, err = getRepresentor(config.Gateway.GatewayAcceleratedInterface)
621+
if err != nil {
622+
return nil, fmt.Errorf("gateway accelerated interface %s is not valid: %w", config.Gateway.GatewayAcceleratedInterface, err)
623+
}
624+
isGWAcclInterface = true
625+
klog.Infof("For gateway accelerated interface %s representor: %s", config.Gateway.GatewayAcceleratedInterface, intfRep)
626+
}
627+
628+
if isGWAcclInterface {
629+
gatewayAcceleratedInterface := config.Gateway.GatewayAcceleratedInterface
630+
bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfRep)
631+
if err != nil {
632+
return nil, fmt.Errorf("failed to find bridge that has port %s: %w", intfRep, err)
633+
}
634+
link, err := util.GetNetLinkOps().LinkByName(gatewayAcceleratedInterface)
635+
if err != nil {
636+
return nil, fmt.Errorf("failed to get netdevice link for %s: %w", gatewayAcceleratedInterface, err)
637+
}
638+
uplinkName, err := util.GetNicName(bridgeName)
639+
if err != nil {
640+
return nil, fmt.Errorf("failed to find nic name for bridge %s: %w", bridgeName, err)
641+
}
642+
res.bridgeName = bridgeName
643+
res.uplinkName = uplinkName
644+
res.gwIfaceRep = intfRep
645+
res.gwIface = gatewayAcceleratedInterface
646+
res.macAddress = link.Attrs().HardwareAddr
647+
} else if bridgeName, _, err := util.RunOVSVsctl("port-to-br", intfName); err == nil {
597648
// This is an OVS bridge's internal port
598649
uplinkName, err := util.GetNicName(bridgeName)
599650
if err != nil {
600651
return nil, fmt.Errorf("failed to find nic name for bridge %s: %w", bridgeName, err)
601652
}
602653
res.bridgeName = bridgeName
654+
res.gwIface = bridgeName
603655
res.uplinkName = uplinkName
604656
gwIntf = bridgeName
605657
} else if _, _, err := util.RunOVSVsctl("br-exists", intfName); err != nil {
@@ -610,6 +662,7 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubn
610662
return nil, fmt.Errorf("nicToBridge failed for %s: %w", intfName, err)
611663
}
612664
res.bridgeName = bridgeName
665+
res.gwIface = bridgeName
613666
res.uplinkName = intfName
614667
gwIntf = bridgeName
615668
} else {
@@ -625,8 +678,8 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubn
625678
res.uplinkName = uplinkName
626679
}
627680
res.bridgeName = intfName
681+
res.gwIface = intfName
628682
}
629-
var err error
630683
// Now, we get IP addresses for the bridge
631684
if len(gwIPs) > 0 {
632685
// use gwIPs if provided
@@ -640,9 +693,11 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubn
640693
}
641694
}
642695

643-
res.macAddress, err = util.GetOVSPortMACAddress(gwIntf)
644-
if err != nil {
645-
return nil, fmt.Errorf("failed to get MAC address for ovs port %s: %w", gwIntf, err)
696+
if !isGWAcclInterface { // We do not have an accelerated device for Gateway interface
697+
res.macAddress, err = util.GetOVSPortMACAddress(gwIntf)
698+
if err != nil {
699+
return nil, fmt.Errorf("failed to get MAC address for ovs port %s: %w", gwIntf, err)
700+
}
646701
}
647702

648703
res.interfaceID, err = bridgedGatewayNodeSetup(nodeName, res.bridgeName, physicalNetworkName)
@@ -665,6 +720,14 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, nodeSubn
665720
return nil, err
666721
}
667722
}
668-
669723
return &res, nil
670724
}
725+
726+
func getRepresentor(intfName string) (string, error) {
727+
deviceID, err := util.GetDeviceIDFromNetdevice(intfName)
728+
if err != nil {
729+
return "", err
730+
}
731+
732+
return util.GetFunctionRepresentorName(deviceID)
733+
}

go-controller/pkg/node/gateway_init.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ func (nc *DefaultNodeNetworkController) updateGatewayMAC(link netlink.Link) erro
603603
return nil
604604
}
605605

606-
if nc.Gateway.GetGatewayBridgeIface() != link.Attrs().Name {
606+
if nc.Gateway.GetGatewayIface() != link.Attrs().Name {
607607
return nil
608608
}
609609

go-controller/pkg/node/gateway_shared_intf.go

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,10 +1440,10 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st
14401440
fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+
14411441
"actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort))
14421442

1443-
// table0, Geneve packets coming from LOCAL. Skip conntrack and go directly to external
1443+
// table0, Geneve packets coming from LOCAL/Host OFPort. Skip conntrack and go directly to external
14441444
dftFlows = append(dftFlows,
14451445
fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+
1446-
"actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys))
1446+
"actions=output:%s", defaultOpenFlowCookie, ofPortHost, config.Default.EncapPort, ofPortPhys))
14471447
}
14481448
physicalIP, err := util.MatchFirstIPNetFamily(false, bridgeIPs)
14491449
if err != nil {
@@ -2148,7 +2148,15 @@ func setBridgeOfPorts(bridge *bridgeConfiguration) error {
21482148
hostRep, stderr, err)
21492149
}
21502150
} else {
2151-
bridge.ofPortHost = ovsLocalPort
2151+
var err error
2152+
if bridge.gwIfaceRep != "" {
2153+
bridge.ofPortHost, _, err = util.RunOVSVsctl("get", "interface", bridge.gwIfaceRep, "ofport")
2154+
if err != nil {
2155+
return fmt.Errorf("failed to get ofport of bypass rep %s, error: %v", bridge.gwIfaceRep, err)
2156+
}
2157+
} else {
2158+
bridge.ofPortHost = ovsLocalPort
2159+
}
21522160
}
21532161

21542162
return nil
@@ -2303,15 +2311,15 @@ func newGateway(
23032311
// Delete stale masquerade resources if there are any. This is to make sure that there
23042312
// are no Linux resources with IP from old masquerade subnet when masquerade subnet
23052313
// gets changed as part of day2 operation.
2306-
if err := deleteStaleMasqueradeResources(gwBridge.bridgeName, nodeName, watchFactory); err != nil {
2314+
if err := deleteStaleMasqueradeResources(gwBridge.getGatewayIface(), nodeName, watchFactory); err != nil {
23072315
return fmt.Errorf("failed to remove stale masquerade resources: %w", err)
23082316
}
23092317

2310-
if err := setNodeMasqueradeIPOnExtBridge(gwBridge.bridgeName); err != nil {
2311-
return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwBridge.bridgeName, err)
2318+
if err := setNodeMasqueradeIPOnExtBridge(gwBridge.getGatewayIface()); err != nil {
2319+
return fmt.Errorf("failed to set the node masquerade IP on the ext bridge %s: %v", gwBridge.getGatewayIface(), err)
23122320
}
23132321

2314-
if err := addMasqueradeRoute(routeManager, gwBridge.bridgeName, nodeName, gwIPs, watchFactory); err != nil {
2322+
if err := addMasqueradeRoute(routeManager, gwBridge.getGatewayIface(), nodeName, gwIPs, watchFactory); err != nil {
23152323
return fmt.Errorf("failed to set the node masquerade route to OVN: %v", err)
23162324
}
23172325

@@ -2364,7 +2372,7 @@ func newGateway(
23642372
gw.openflowManager.requestFlowSync()
23652373
}
23662374

2367-
if err := addHostMACBindings(gwBridge.bridgeName); err != nil {
2375+
if err := addHostMACBindings(gwBridge.getGatewayIface()); err != nil {
23682376
return fmt.Errorf("failed to add MAC bindings for service routing: %w", err)
23692377
}
23702378

@@ -2421,11 +2429,11 @@ func newNodePortWatcher(
24212429
subnets = append(subnets, config.Kubernetes.ServiceCIDRs...)
24222430
if config.Gateway.DisableForwarding {
24232431
if err := initExternalBridgeServiceForwardingRules(subnets); err != nil {
2424-
return nil, fmt.Errorf("failed to add accept rules in forwarding table for bridge %s: err %v", gwBridge.bridgeName, err)
2432+
return nil, fmt.Errorf("failed to add accept rules in forwarding table for bridge %s: err %v", gwBridge.getGatewayIface(), err)
24252433
}
24262434
} else {
24272435
if err := delExternalBridgeServiceForwardingRules(subnets); err != nil {
2428-
return nil, fmt.Errorf("failed to delete accept rules in forwarding table for bridge %s: err %v", gwBridge.bridgeName, err)
2436+
return nil, fmt.Errorf("failed to delete accept rules in forwarding table for bridge %s: err %v", gwBridge.getGatewayIface(), err)
24292437
}
24302438
}
24312439

@@ -2443,7 +2451,7 @@ func newNodePortWatcher(
24432451
gatewayIPv4: gatewayIPv4,
24442452
gatewayIPv6: gatewayIPv6,
24452453
ofportPhys: ofportPhys,
2446-
gwBridge: gwBridge.bridgeName,
2454+
gwBridge: gwBridge.getGatewayIface(),
24472455
serviceInfo: make(map[ktypes.NamespacedName]*serviceConfig),
24482456
nodeIPManager: nodeIPManager,
24492457
ofm: ofm,

0 commit comments

Comments
 (0)