Skip to content

Commit a59db0e

Browse files
maxpainsmira
authored andcommitted
fix: improve OpenStack bare metal network configuration reliability
This commit improves network configuration on OpenStack bare metal (Ironic) by: 1. Adding WaitForDevicesReady call before parsing network configuration to ensure network interfaces are available. 2. Adding fallback to HardwareAddr when PermanentAddr is empty, similar to the NoCloud platform implementation. 3. Adding a retry loop with exponential backoff to handle cases where network interfaces are not immediately available after device init. 4. Removing hardcoded bond parameters (UpDelay: 200, DownDelay: 200, LACPRate: fast) to use kernel defaults matching Ubuntu behavior. 5. Explicitly setting the bond interface MAC address from OpenStack metadata to ensure the switch sees the correct MAC on the bond. These changes address network configuration failures on bare metal OpenStack deployments where interfaces may not be immediately discoverable by MAC address, and fix bond MAC address mismatch issues where the switch expects a specific MAC on the bond interface. Signed-off-by: Max Makarov <maxpain@linux.com> Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
1 parent 659009a commit a59db0e

File tree

3 files changed

+188
-68
lines changed

3 files changed

+188
-68
lines changed

internal/app/machined/pkg/runtime/v1alpha1/platform/openstack/openstack.go

Lines changed: 74 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@ import (
1212
stderrors "errors"
1313
"fmt"
1414
"log"
15+
"net"
1516
"net/netip"
1617
"strings"
18+
"time"
1719

20+
"github.com/cenkalti/backoff/v4"
1821
"github.com/cosi-project/runtime/pkg/safe"
1922
"github.com/cosi-project/runtime/pkg/state"
2023
"github.com/siderolabs/go-procfs/procfs"
@@ -48,16 +51,17 @@ func (o *OpenStack) ParseMetadata(
4851
extIPs []netip.Addr,
4952
metadata *MetadataConfig,
5053
st state.State,
51-
) (*runtime.PlatformNetworkConfig, error) {
54+
) (*runtime.PlatformNetworkConfig, bool, error) {
5255
networkConfig := &runtime.PlatformNetworkConfig{}
56+
needsReconcile := false
5357

5458
if metadata.Hostname != "" {
5559
hostnameSpec := network.HostnameSpecSpec{
5660
ConfigLayer: network.ConfigPlatform,
5761
}
5862

5963
if err := hostnameSpec.ParseFQDN(metadata.Hostname); err != nil {
60-
return nil, err
64+
return nil, false, err
6165
}
6266

6367
networkConfig.Hostnames = append(networkConfig.Hostnames, hostnameSpec)
@@ -72,7 +76,7 @@ func (o *OpenStack) ParseMetadata(
7276
if ip, err := netip.ParseAddr(netsvc.Address); err == nil {
7377
dnsIPs = append(dnsIPs, ip)
7478
} else {
75-
return nil, fmt.Errorf("failed to parse dns service ip: %w", err)
79+
return nil, false, fmt.Errorf("failed to parse dns service ip: %w", err)
7680
}
7781
}
7882
}
@@ -86,7 +90,7 @@ func (o *OpenStack) ParseMetadata(
8690

8791
hostInterfaces, err := safe.StateListAll[*network.LinkStatus](ctx, st)
8892
if err != nil {
89-
return nil, fmt.Errorf("error listing host interfaces: %w", err)
93+
return nil, false, fmt.Errorf("error listing host interfaces: %w", err)
9094
}
9195

9296
ifaces := make(map[string]string)
@@ -103,12 +107,12 @@ func (o *OpenStack) ParseMetadata(
103107

104108
mode, err := nethelpers.BondModeByName(netLink.BondMode)
105109
if err != nil {
106-
return nil, fmt.Errorf("invalid bond_mode: %w", err)
110+
return nil, false, fmt.Errorf("invalid bond_mode: %w", err)
107111
}
108112

109113
hashPolicy, err := nethelpers.BondXmitHashPolicyByName(netLink.BondHashPolicy)
110114
if err != nil {
111-
return nil, fmt.Errorf("invalid bond_xmit_hash_policy: %w", err)
115+
return nil, false, fmt.Errorf("invalid bond_xmit_hash_policy: %w", err)
112116
}
113117

114118
bondName := fmt.Sprintf("bond%d", bondIndex)
@@ -132,6 +136,15 @@ func (o *OpenStack) ParseMetadata(
132136
},
133137
}
134138

139+
if netLink.Mac != "" {
140+
mac, err := net.ParseMAC(netLink.Mac)
141+
if err != nil {
142+
return nil, false, fmt.Errorf("invalid bond MAC address %q: %w", netLink.Mac, err)
143+
}
144+
145+
bondLink.HardwareAddress = nethelpers.HardwareAddr(mac)
146+
}
147+
135148
if mode == nethelpers.BondMode8023AD {
136149
bondLink.BondMaster.ADLACPActive = nethelpers.ADLACPActiveOn
137150
}
@@ -178,18 +191,27 @@ func (o *OpenStack) ParseMetadata(
178191
case "phy", "vif", "ovs", "bridge", "tap", "vhostuser", "hw_veb":
179192
linkName := ""
180193

181-
for hostInterface := range hostInterfaces.All() {
182-
if strings.EqualFold(hostInterface.TypedSpec().PermanentAddr.String(), netLink.Mac) {
183-
linkName = hostInterface.Metadata().ID()
194+
if netLink.Mac != "" {
195+
for hostInterface := range hostInterfaces.All() {
196+
macAddress := hostInterface.TypedSpec().PermanentAddr.String()
197+
if macAddress == "" {
198+
macAddress = hostInterface.TypedSpec().HardwareAddr.String()
199+
}
200+
201+
if strings.EqualFold(macAddress, netLink.Mac) {
202+
linkName = hostInterface.Metadata().ID()
184203

185-
break
204+
break
205+
}
186206
}
187207
}
188208

189209
if linkName == "" {
190210
linkName = fmt.Sprintf("eth%d", idx)
191211

192212
log.Printf("failed to find interface with MAC %q, using %q", netLink.Mac, linkName)
213+
214+
needsReconcile = true
193215
}
194216

195217
ifaces[netLink.ID] = linkName
@@ -290,7 +312,7 @@ func (o *OpenStack) ParseMetadata(
290312
if ntwrk.Address != "" {
291313
ipPrefix, err := address.IPPrefixFrom(ntwrk.Address, ntwrk.Netmask)
292314
if err != nil {
293-
return nil, fmt.Errorf("failed to parse ip address: %w", err)
315+
return nil, false, fmt.Errorf("failed to parse ip address: %w", err)
294316
}
295317

296318
family := nethelpers.FamilyInet4
@@ -312,7 +334,7 @@ func (o *OpenStack) ParseMetadata(
312334
if ntwrk.Gateway != "" {
313335
gw, err := netip.ParseAddr(ntwrk.Gateway)
314336
if err != nil {
315-
return nil, fmt.Errorf("failed to parse gateway ip: %w", err)
337+
return nil, false, fmt.Errorf("failed to parse gateway ip: %w", err)
316338
}
317339

318340
priority := uint32(network.DefaultRouteMetric)
@@ -341,12 +363,12 @@ func (o *OpenStack) ParseMetadata(
341363
for _, route := range ntwrk.Routes {
342364
gw, err := netip.ParseAddr(route.Gateway)
343365
if err != nil {
344-
return nil, fmt.Errorf("failed to parse route gateway: %w", err)
366+
return nil, false, fmt.Errorf("failed to parse route gateway: %w", err)
345367
}
346368

347369
dest, err := address.IPPrefixFrom(route.Network, route.Netmask)
348370
if err != nil {
349-
return nil, fmt.Errorf("failed to parse route network: %w", err)
371+
return nil, false, fmt.Errorf("failed to parse route network: %w", err)
350372
}
351373

352374
family := nethelpers.FamilyInet4
@@ -386,7 +408,7 @@ func (o *OpenStack) ParseMetadata(
386408
ProviderID: fmt.Sprintf("openstack:///%s", metadata.UUID),
387409
}
388410

389-
return networkConfig, nil
411+
return networkConfig, needsReconcile, nil
390412
}
391413

392414
// Configuration implements the runtime.Platform interface.
@@ -426,7 +448,14 @@ func (o *OpenStack) KernelArgs(string, quirks.Quirks) procfs.Parameters {
426448
}
427449

428450
// NetworkConfiguration implements the runtime.Platform interface.
451+
//
452+
//nolint:gocyclo
429453
func (o *OpenStack) NetworkConfiguration(ctx context.Context, st state.State, ch chan<- *runtime.PlatformNetworkConfig) error {
454+
// wait for devices to be ready before proceeding, otherwise we might not find network interfaces by MAC
455+
if err := netutils.WaitForDevicesReady(ctx, st); err != nil {
456+
return fmt.Errorf("error waiting for devices to be ready: %w", err)
457+
}
458+
430459
networkSource := false
431460

432461
metadataConfigDl, metadataNetworkConfigDl, _, err := o.configFromCD(ctx, st)
@@ -462,16 +491,36 @@ func (o *OpenStack) NetworkConfiguration(ctx context.Context, st state.State, ch
462491
}
463492
}
464493

465-
networkConfig, err := o.ParseMetadata(ctx, &unmarshalledNetworkConfig, extIPs, &meta, st)
466-
if err != nil {
467-
return err
468-
}
494+
// do a loop to retry network config remap in case of missing links
495+
// on each try, export the configuration as it is, and if the network is reconciled next time, export the reconciled configuration
496+
bckoff := backoff.NewExponentialBackOff()
469497

470-
select {
471-
case ch <- networkConfig:
472-
case <-ctx.Done():
473-
return ctx.Err()
474-
}
498+
for {
499+
networkConfig, needsReconcile, err := o.ParseMetadata(ctx, &unmarshalledNetworkConfig, extIPs, &meta, st)
500+
if err != nil {
501+
return err
502+
}
503+
504+
select {
505+
case ch <- networkConfig:
506+
case <-ctx.Done():
507+
return ctx.Err()
508+
}
475509

476-
return nil
510+
if !needsReconcile {
511+
return nil
512+
}
513+
514+
// wait for backoff to retry network config remap
515+
nextBackoff := bckoff.NextBackOff()
516+
if nextBackoff == backoff.Stop {
517+
return nil
518+
}
519+
520+
select {
521+
case <-ctx.Done():
522+
return ctx.Err()
523+
case <-time.After(nextBackoff):
524+
}
525+
}
477526
}

internal/app/machined/pkg/runtime/v1alpha1/platform/openstack/openstack_test.go

Lines changed: 113 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package openstack_test
66

77
import (
8+
"context"
89
_ "embed"
910
"encoding/json"
1011
"net/netip"
@@ -17,6 +18,7 @@ import (
1718
"github.com/stretchr/testify/require"
1819
"go.yaml.in/yaml/v4"
1920

21+
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
2022
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/openstack"
2123
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
2224
"github.com/siderolabs/talos/pkg/machinery/resources/network"
@@ -32,47 +34,115 @@ var rawNetwork []byte
3234
var expectedNetworkConfig string
3335

3436
func TestParseMetadata(t *testing.T) {
35-
o := &openstack.OpenStack{}
36-
37-
var metadata openstack.MetadataConfig
38-
39-
require.NoError(t, json.Unmarshal(rawMetadata, &metadata))
40-
41-
var n openstack.NetworkConfig
42-
43-
require.NoError(t, json.Unmarshal(rawNetwork, &n))
44-
45-
ctx := t.Context()
46-
47-
st := state.WrapCore(namespaced.NewState(inmem.Build))
48-
49-
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
50-
eth0.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x30}
51-
require.NoError(t, st.Create(ctx, eth0))
52-
53-
eth1 := network.NewLinkStatus(network.NamespaceName, "eth1")
54-
eth1.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x31}
55-
require.NoError(t, st.Create(ctx, eth1))
56-
57-
eth2 := network.NewLinkStatus(network.NamespaceName, "eth2")
58-
eth2.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x33}
59-
require.NoError(t, st.Create(ctx, eth2))
60-
61-
// Bond slaves
62-
63-
eth3 := network.NewLinkStatus(network.NamespaceName, "eth3")
64-
eth3.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x4c, 0xd9, 0x8f, 0xb3, 0x34, 0xf8}
65-
require.NoError(t, st.Create(ctx, eth3))
66-
67-
eth4 := network.NewLinkStatus(network.NamespaceName, "eth4")
68-
eth4.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x4c, 0xd9, 0x8f, 0xb3, 0x34, 0xf7}
69-
require.NoError(t, st.Create(ctx, eth4))
70-
71-
networkConfig, err := o.ParseMetadata(ctx, &n, []netip.Addr{netip.MustParseAddr("1.2.3.4")}, &metadata, st)
72-
require.NoError(t, err)
73-
74-
marshaled, err := yaml.Marshal(networkConfig)
75-
require.NoError(t, err)
76-
77-
assert.Equal(t, expectedNetworkConfig, string(marshaled))
37+
t.Parallel()
38+
39+
for _, tt := range []struct {
40+
name string
41+
networkJSON []byte
42+
metadataJSON []byte
43+
extIPs []netip.Addr
44+
setupState func(t *testing.T, ctx context.Context, st state.State)
45+
expectedNeedsReconcile bool
46+
expected string
47+
checkResult func(t *testing.T, cfg *runtime.PlatformNetworkConfig)
48+
}{
49+
{
50+
name: "full config",
51+
networkJSON: rawNetwork,
52+
metadataJSON: rawMetadata,
53+
extIPs: []netip.Addr{netip.MustParseAddr("1.2.3.4")},
54+
setupState: func(t *testing.T, ctx context.Context, st state.State) {
55+
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
56+
eth0.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x30}
57+
require.NoError(t, st.Create(ctx, eth0))
58+
59+
eth1 := network.NewLinkStatus(network.NamespaceName, "eth1")
60+
eth1.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x31}
61+
require.NoError(t, st.Create(ctx, eth1))
62+
63+
eth2 := network.NewLinkStatus(network.NamespaceName, "eth2")
64+
eth2.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0xa4, 0xbf, 0x00, 0x10, 0x20, 0x33}
65+
require.NoError(t, st.Create(ctx, eth2))
66+
67+
eth3 := network.NewLinkStatus(network.NamespaceName, "eth3")
68+
eth3.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x4c, 0xd9, 0x8f, 0xb3, 0x34, 0xf8}
69+
require.NoError(t, st.Create(ctx, eth3))
70+
71+
eth4 := network.NewLinkStatus(network.NamespaceName, "eth4")
72+
eth4.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x4c, 0xd9, 0x8f, 0xb3, 0x34, 0xf7}
73+
require.NoError(t, st.Create(ctx, eth4))
74+
},
75+
expected: expectedNetworkConfig,
76+
},
77+
{
78+
name: "HardwareAddr fallback",
79+
networkJSON: []byte(`{"links":[{"id":"iface1","type":"phy","ethernet_mac_address":"aa:bb:cc:dd:ee:ff","mtu":1500}],"networks":[{"id":"net1","link":"iface1","type":"ipv4_dhcp"}]}`),
80+
setupState: func(t *testing.T, ctx context.Context, st state.State) {
81+
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
82+
eth0.TypedSpec().HardwareAddr = nethelpers.HardwareAddr{0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}
83+
require.NoError(t, st.Create(ctx, eth0))
84+
},
85+
checkResult: func(t *testing.T, cfg *runtime.PlatformNetworkConfig) {
86+
require.Len(t, cfg.Links, 1)
87+
assert.Equal(t, "eth0", cfg.Links[0].Name)
88+
},
89+
},
90+
{
91+
name: "empty MAC does not match",
92+
networkJSON: []byte(`{"links":[{"id":"iface1","type":"phy","ethernet_mac_address":"","mtu":1500}],"networks":[{"id":"net1","link":"iface1","type":"ipv4_dhcp"}]}`),
93+
setupState: func(t *testing.T, ctx context.Context, st state.State) {
94+
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
95+
require.NoError(t, st.Create(ctx, eth0))
96+
},
97+
expectedNeedsReconcile: true,
98+
},
99+
{
100+
name: "MAC mismatch triggers reconcile",
101+
networkJSON: []byte(`{"links":[{"id":"iface1","type":"phy","ethernet_mac_address":"aa:bb:cc:dd:ee:ff","mtu":1500}],"networks":[{"id":"net1","link":"iface1","type":"ipv4_dhcp"}]}`),
102+
setupState: func(t *testing.T, ctx context.Context, st state.State) {
103+
eth0 := network.NewLinkStatus(network.NamespaceName, "eth0")
104+
eth0.TypedSpec().PermanentAddr = nethelpers.HardwareAddr{0x11, 0x22, 0x33, 0x44, 0x55, 0x66}
105+
require.NoError(t, st.Create(ctx, eth0))
106+
},
107+
expectedNeedsReconcile: true,
108+
},
109+
} {
110+
t.Run(tt.name, func(t *testing.T) {
111+
t.Parallel()
112+
113+
ctx := t.Context()
114+
st := state.WrapCore(namespaced.NewState(inmem.Build))
115+
116+
tt.setupState(t, ctx, st)
117+
118+
var (
119+
metadata openstack.MetadataConfig
120+
n openstack.NetworkConfig
121+
)
122+
123+
if tt.metadataJSON != nil {
124+
require.NoError(t, json.Unmarshal(tt.metadataJSON, &metadata))
125+
}
126+
127+
require.NoError(t, json.Unmarshal(tt.networkJSON, &n))
128+
129+
o := &openstack.OpenStack{}
130+
131+
networkConfig, needsReconcile, err := o.ParseMetadata(ctx, &n, tt.extIPs, &metadata, st)
132+
require.NoError(t, err)
133+
134+
assert.Equal(t, tt.expectedNeedsReconcile, needsReconcile)
135+
136+
if tt.expected != "" {
137+
marshaled, err := yaml.Marshal(networkConfig)
138+
require.NoError(t, err)
139+
140+
assert.Equal(t, tt.expected, string(marshaled))
141+
}
142+
143+
if tt.checkResult != nil {
144+
tt.checkResult(t, networkConfig)
145+
}
146+
})
147+
}
78148
}

0 commit comments

Comments
 (0)