Skip to content

Commit b5046a0

Browse files
Make throttling nmagent fetches for nodesubnet more dynamic (#3023)
* feat(CNS): Early work on better throttling in NMAgent fetch for nodesubnet * feat(CNS): Update NMAgent fetches to be async with binary exponential backoff * chore: check for empty nmagent response * test: update test for empty response * style: make linter happy * chore: fix some comments * fix: Fix bug in refresh * refactor: Address comments * refactor: ignore primary ip * refactor: move refresh out of ipfetcher * test: add ip fetcher tests * fix: remove broken import * fix: fix import * fix: fix linting * fix: fix some failing tests * chore: Remove unused function * test: test updates * fix: address comments * chore: add missed file * chore: add comment about static interval * feat: address Evan's comment to require Equal method on cached results * chore: add missed file * feat: more efficient equality * refactor: address Evan's comment * refactor: address Tim's comment * fix: undo accidental commit * fix: make linter happy * fix: make linter happy
1 parent 3ed0bcd commit b5046a0

File tree

12 files changed

+581
-80
lines changed

12 files changed

+581
-80
lines changed

cns/nodesubnet/helper_for_ip_fetcher_test.go

Lines changed: 0 additions & 9 deletions
This file was deleted.

cns/nodesubnet/ip_fetcher.go

Lines changed: 66 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,49 +7,93 @@ import (
77
"time"
88

99
"github.com/Azure/azure-container-networking/nmagent"
10+
"github.com/Azure/azure-container-networking/refresh"
1011
"github.com/pkg/errors"
1112
)
1213

14+
const (
15+
// Default minimum time between secondary IP fetches
16+
DefaultMinRefreshInterval = 4 * time.Second
17+
// Default maximum time between secondary IP fetches
18+
DefaultMaxRefreshInterval = 1024 * time.Second
19+
)
20+
1321
var ErrRefreshSkipped = errors.New("refresh skipped due to throttling")
1422

1523
// InterfaceRetriever is an interface is implemented by the NMAgent Client, and also a mock client for testing.
1624
type InterfaceRetriever interface {
1725
GetInterfaceIPInfo(ctx context.Context) (nmagent.Interfaces, error)
1826
}
1927

20-
type IPFetcher struct {
21-
// Node subnet state
22-
secondaryIPQueryInterval time.Duration // Minimum time between secondary IP fetches
23-
secondaryIPLastRefreshTime time.Time // Time of last secondary IP fetch
28+
// IPConsumer is an interface implemented by whoever consumes the secondary IPs fetched in nodesubnet
29+
type IPConsumer interface {
30+
UpdateIPsForNodeSubnet([]netip.Addr) error
31+
}
2432

25-
ipFectcherClient InterfaceRetriever
33+
// IPFetcher fetches secondary IPs from NMAgent at regular intervals. The
34+
// interval will vary within the range of minRefreshInterval and
35+
// maxRefreshInterval. When no diff is observed after a fetch, the interval
36+
// doubles (subject to the maximum interval). When a diff is observed, the
37+
// interval resets to the minimum.
38+
type IPFetcher struct {
39+
// Node subnet config
40+
intfFetcherClient InterfaceRetriever
41+
consumer IPConsumer
42+
fetcher *refresh.Fetcher[nmagent.Interfaces]
2643
}
2744

28-
func NewIPFetcher(nmaClient InterfaceRetriever, queryInterval time.Duration) *IPFetcher {
29-
return &IPFetcher{
30-
ipFectcherClient: nmaClient,
31-
secondaryIPQueryInterval: queryInterval,
45+
// NewIPFetcher creates a new IPFetcher. If minInterval is 0, it will default to 4 seconds.
46+
// If maxInterval is 0, it will default to 1024 seconds (or minInterval, if it is higher).
47+
func NewIPFetcher(
48+
client InterfaceRetriever,
49+
consumer IPConsumer,
50+
minInterval time.Duration,
51+
maxInterval time.Duration,
52+
logger refresh.Logger,
53+
) *IPFetcher {
54+
if minInterval == 0 {
55+
minInterval = DefaultMinRefreshInterval
56+
}
57+
58+
if maxInterval == 0 {
59+
maxInterval = DefaultMaxRefreshInterval
60+
}
61+
62+
maxInterval = max(maxInterval, minInterval)
63+
64+
newIPFetcher := &IPFetcher{
65+
intfFetcherClient: client,
66+
consumer: consumer,
67+
fetcher: nil,
3268
}
69+
fetcher := refresh.NewFetcher[nmagent.Interfaces](client.GetInterfaceIPInfo, minInterval, maxInterval, newIPFetcher.ProcessInterfaces, logger)
70+
newIPFetcher.fetcher = fetcher
71+
return newIPFetcher
72+
}
73+
74+
// Start the IPFetcher.
75+
func (c *IPFetcher) Start(ctx context.Context) {
76+
c.fetcher.Start(ctx)
3377
}
3478

35-
func (c *IPFetcher) RefreshSecondaryIPsIfNeeded(ctx context.Context) (ips []netip.Addr, err error) {
36-
// If secondaryIPQueryInterval has elapsed since the last fetch, fetch secondary IPs
37-
if time.Since(c.secondaryIPLastRefreshTime) < c.secondaryIPQueryInterval {
38-
return nil, ErrRefreshSkipped
79+
// Fetch IPs from NMAgent and pass to the consumer
80+
func (c *IPFetcher) ProcessInterfaces(response nmagent.Interfaces) error {
81+
if len(response.Entries) == 0 {
82+
return errors.New("no interfaces found in response from NMAgent")
3983
}
4084

41-
c.secondaryIPLastRefreshTime = time.Now()
42-
response, err := c.ipFectcherClient.GetInterfaceIPInfo(ctx)
85+
_, secondaryIPs := flattenIPListFromResponse(&response)
86+
err := c.consumer.UpdateIPsForNodeSubnet(secondaryIPs)
4387
if err != nil {
44-
return nil, errors.Wrap(err, "getting interface IPs")
88+
return errors.Wrap(err, "updating secondary IPs")
4589
}
4690

47-
res := flattenIPListFromResponse(&response)
48-
return res, nil
91+
return nil
4992
}
5093

5194
// Get the list of secondary IPs from fetched Interfaces
52-
func flattenIPListFromResponse(resp *nmagent.Interfaces) (res []netip.Addr) {
95+
func flattenIPListFromResponse(resp *nmagent.Interfaces) (primary netip.Addr, secondaryIPs []netip.Addr) {
96+
var primaryIP netip.Addr
5397
// For each interface...
5498
for _, intf := range resp.Entries {
5599
if !intf.IsPrimary {
@@ -63,15 +107,16 @@ func flattenIPListFromResponse(resp *nmagent.Interfaces) (res []netip.Addr) {
63107
for _, a := range s.IPAddress {
64108
// Primary addresses are reserved for the host.
65109
if a.IsPrimary {
110+
primaryIP = netip.Addr(a.Address)
66111
continue
67112
}
68113

69-
res = append(res, netip.Addr(a.Address))
114+
secondaryIPs = append(secondaryIPs, netip.Addr(a.Address))
70115
addressCount++
71116
}
72117
log.Printf("Got %d addresses from subnet %s", addressCount, s.Prefix)
73118
}
74119
}
75120

76-
return res
121+
return primaryIP, secondaryIPs
77122
}

cns/nodesubnet/ip_fetcher_test.go

Lines changed: 81 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,75 +2,102 @@ package nodesubnet_test
22

33
import (
44
"context"
5-
"errors"
5+
"net/netip"
66
"testing"
7-
"time"
87

8+
"github.com/Azure/azure-container-networking/cns/logger"
99
"github.com/Azure/azure-container-networking/cns/nodesubnet"
1010
"github.com/Azure/azure-container-networking/nmagent"
1111
)
1212

13-
// Mock client that simply tracks if refresh has been called
14-
type TestClient struct {
15-
fetchCalled bool
13+
// Mock client that simply consumes fetched IPs
14+
type TestConsumer struct {
15+
consumeCount int
16+
secondaryIPCount int
1617
}
1718

19+
// FetchConsumeCount atomically fetches the consume count
20+
func (c *TestConsumer) FetchConsumeCount() int {
21+
return c.consumeCount
22+
}
23+
24+
// FetchSecondaryIPCount atomically fetches the last IP count
25+
func (c *TestConsumer) FetchSecondaryIPCount() int {
26+
return c.consumeCount
27+
}
28+
29+
// UpdateConsumeCount atomically updates the consume count
30+
func (c *TestConsumer) updateCounts(ipCount int) {
31+
c.consumeCount++
32+
c.secondaryIPCount = ipCount
33+
}
34+
35+
// Mock IP update
36+
func (c *TestConsumer) UpdateIPsForNodeSubnet(ips []netip.Addr) error {
37+
c.updateCounts(len(ips))
38+
return nil
39+
}
40+
41+
var _ nodesubnet.IPConsumer = &TestConsumer{}
42+
43+
// Mock client that simply satisfies the interface
44+
type TestClient struct{}
45+
1846
// Mock refresh
1947
func (c *TestClient) GetInterfaceIPInfo(_ context.Context) (nmagent.Interfaces, error) {
20-
c.fetchCalled = true
2148
return nmagent.Interfaces{}, nil
2249
}
2350

24-
func TestRefreshSecondaryIPsIfNeeded(t *testing.T) {
25-
getTests := []struct {
26-
name string
27-
shouldCall bool
28-
interval time.Duration
29-
}{
30-
{
31-
"fetch called",
32-
true,
33-
-1 * time.Second, // Negative timeout to force refresh
34-
},
35-
{
36-
"no refresh needed",
37-
false,
38-
10 * time.Hour, // High timeout to avoid refresh
51+
func TestEmptyResponse(t *testing.T) {
52+
consumerPtr := &TestConsumer{}
53+
fetcher := nodesubnet.NewIPFetcher(&TestClient{}, consumerPtr, 0, 0, logger.Log)
54+
err := fetcher.ProcessInterfaces(nmagent.Interfaces{})
55+
checkErr(t, err, true)
56+
57+
// No consumes, since the responses are empty
58+
if consumerPtr.FetchConsumeCount() > 0 {
59+
t.Error("Consume called unexpectedly, shouldn't be called since responses are empty")
60+
}
61+
}
62+
63+
func TestFlatten(t *testing.T) {
64+
interfaces := nmagent.Interfaces{
65+
Entries: []nmagent.Interface{
66+
{
67+
MacAddress: nmagent.MACAddress{0x00, 0x0D, 0x3A, 0xF9, 0xDC, 0xA6},
68+
IsPrimary: true,
69+
InterfaceSubnets: []nmagent.InterfaceSubnet{
70+
{
71+
Prefix: "10.240.0.0/16",
72+
IPAddress: []nmagent.NodeIP{
73+
{
74+
Address: nmagent.IPAddress(netip.AddrFrom4([4]byte{10, 240, 0, 5})),
75+
IsPrimary: true,
76+
},
77+
{
78+
Address: nmagent.IPAddress(netip.AddrFrom4([4]byte{10, 240, 0, 6})),
79+
IsPrimary: false,
80+
},
81+
},
82+
},
83+
},
84+
},
3985
},
4086
}
87+
consumerPtr := &TestConsumer{}
88+
fetcher := nodesubnet.NewIPFetcher(&TestClient{}, consumerPtr, 0, 0, logger.Log)
89+
err := fetcher.ProcessInterfaces(interfaces)
90+
checkErr(t, err, false)
4191

42-
clientPtr := &TestClient{}
43-
fetcher := nodesubnet.NewIPFetcher(clientPtr, 0)
44-
45-
for _, test := range getTests {
46-
test := test
47-
t.Run(test.name, func(t *testing.T) { // Do not parallelize, as we are using a shared client
48-
fetcher.SetSecondaryIPQueryInterval(test.interval)
49-
ctx, cancel := testContext(t)
50-
defer cancel()
51-
clientPtr.fetchCalled = false
52-
_, err := fetcher.RefreshSecondaryIPsIfNeeded(ctx)
53-
54-
if test.shouldCall {
55-
if err != nil && errors.Is(err, nodesubnet.ErrRefreshSkipped) {
56-
t.Error("refresh expected, but didn't happen")
57-
}
58-
59-
checkErr(t, err, false)
60-
} else if err == nil || !errors.Is(err, nodesubnet.ErrRefreshSkipped) {
61-
t.Error("refresh not expected, but happened")
62-
}
63-
})
92+
// 1 consume to be called
93+
if consumerPtr.FetchConsumeCount() != 1 {
94+
t.Error("Consume expected to be called, but not called")
6495
}
65-
}
6696

67-
// testContext creates a context from the provided testing.T that will be
68-
// canceled if the test suite is terminated.
69-
func testContext(t *testing.T) (context.Context, context.CancelFunc) {
70-
if deadline, ok := t.Deadline(); ok {
71-
return context.WithDeadline(context.Background(), deadline)
97+
// 1 consume to be called
98+
if consumerPtr.FetchSecondaryIPCount() != 1 {
99+
t.Error("Wrong number of secondary IPs ", consumerPtr.FetchSecondaryIPCount())
72100
}
73-
return context.WithCancel(context.Background())
74101
}
75102

76103
// checkErr is an assertion of the presence or absence of an error
@@ -84,3 +111,7 @@ func checkErr(t *testing.T, err error, shouldErr bool) {
84111
t.Fatal("expected error but received none")
85112
}
86113
}
114+
115+
func init() {
116+
logger.InitLogger("testlogs", 0, 0, "./")
117+
}

nmagent/equality.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package nmagent
2+
3+
// Equal compares two Interfaces objects for equality.
4+
func (i Interfaces) Equal(other Interfaces) bool {
5+
if len(i.Entries) != len(other.Entries) {
6+
return false
7+
}
8+
for idx, entry := range i.Entries {
9+
if !entry.Equal(other.Entries[idx]) {
10+
return false
11+
}
12+
}
13+
return true
14+
}
15+
16+
// Equal compares two Interface objects for equality.
17+
func (i Interface) Equal(other Interface) bool {
18+
if len(i.InterfaceSubnets) != len(other.InterfaceSubnets) {
19+
return false
20+
}
21+
for idx, subnet := range i.InterfaceSubnets {
22+
if !subnet.Equal(other.InterfaceSubnets[idx]) {
23+
return false
24+
}
25+
}
26+
if i.IsPrimary != other.IsPrimary || !i.MacAddress.Equal(other.MacAddress) {
27+
return false
28+
}
29+
return true
30+
}
31+
32+
// Equal compares two InterfaceSubnet objects for equality.
33+
func (s InterfaceSubnet) Equal(other InterfaceSubnet) bool {
34+
if len(s.IPAddress) != len(other.IPAddress) {
35+
return false
36+
}
37+
if s.Prefix != other.Prefix {
38+
return false
39+
}
40+
for idx, ip := range s.IPAddress {
41+
if !ip.Equal(other.IPAddress[idx]) {
42+
return false
43+
}
44+
}
45+
return true
46+
}
47+
48+
// Equal compares two NodeIP objects for equality.
49+
func (ip NodeIP) Equal(other NodeIP) bool {
50+
return ip.IsPrimary == other.IsPrimary && ip.Address.Equal(other.Address)
51+
}

nmagent/macaddress.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@ const (
1414

1515
type MACAddress net.HardwareAddr
1616

17+
func (h MACAddress) Equal(other MACAddress) bool {
18+
if len(h) != len(other) {
19+
return false
20+
}
21+
for i := range h {
22+
if h[i] != other[i] {
23+
return false
24+
}
25+
}
26+
return true
27+
}
28+
1729
func (h *MACAddress) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
1830
var macStr string
1931
if err := d.DecodeElement(&macStr, &start); err != nil {

refresh/equaler.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package refresh
2+
3+
type equaler[T any] interface {
4+
Equal(T) bool
5+
}

0 commit comments

Comments
 (0)