Skip to content

Commit 79cb3ef

Browse files
authored
Fix infinite loop in e2e ip generator when cidr exhausted (#10536)
1 parent b10b5a9 commit 79cb3ef

File tree

6 files changed

+227
-47
lines changed

6 files changed

+227
-47
lines changed

cmd/eks-a-tool/cmd/uniqueip.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@ func preRunUniqueIp(cmd *cobra.Command, args []string) {
4848
func generateUniqueIP(ctx context.Context) (string, error) {
4949
cidr := viper.GetString("cidr")
5050
ipgen := networkutils.NewIPGenerator(&networkutils.DefaultNetClient{})
51-
return ipgen.GenerateUniqueIP(cidr)
51+
return ipgen.GenerateUniqueIP(cidr, nil)
5252
}

internal/test/e2e/ipmanager.go

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,28 @@ func newE2EIPManager(logger logr.Logger, networkCidr string) *E2EIPManager {
2020
}
2121
}
2222

23-
func (ipman *E2EIPManager) reserveIP() string {
23+
func (ipman *E2EIPManager) reserveIP() (string, error) {
2424
return ipman.getUniqueIP(ipman.networkCidr, ipman.networkIPs)
2525
}
2626

27-
func (ipman *E2EIPManager) reserveIPPool(count int) networkutils.IPPool {
27+
func (ipman *E2EIPManager) reserveIPPool(count int) (networkutils.IPPool, error) {
2828
pool := networkutils.NewIPPool()
2929
for i := 0; i < count; i++ {
30-
pool.AddIP(ipman.reserveIP())
30+
ip, err := ipman.reserveIP()
31+
if err != nil {
32+
return networkutils.IPPool{}, err
33+
}
34+
pool.AddIP(ip)
3135
}
32-
return pool
36+
return pool, nil
3337
}
3438

35-
func (ipman *E2EIPManager) getUniqueIP(cidr string, usedIPs map[string]bool) string {
39+
func (ipman *E2EIPManager) getUniqueIP(cidr string, usedIPs map[string]bool) (string, error) {
3640
ipgen := networkutils.NewIPGenerator(&networkutils.DefaultNetClient{})
37-
ip, err := ipgen.GenerateUniqueIP(cidr)
38-
for ; err != nil || usedIPs[ip]; ip, err = ipgen.GenerateUniqueIP(cidr) {
39-
if err != nil {
40-
ipman.logger.V(2).Info("Warning: getting unique IP failed", "error", err)
41-
}
42-
if usedIPs[ip] {
43-
ipman.logger.V(2).Info("Warning: generated IP is already taken", "IP", ip)
44-
}
41+
ip, err := ipgen.GenerateUniqueIP(cidr, usedIPs)
42+
if err != nil {
43+
return "", err
4544
}
4645
usedIPs[ip] = true
47-
return ip
46+
return ip, nil
4847
}

internal/test/e2e/run.go

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ const (
2626
testResultError = "error"
2727
nonAirgappedHardware = "nonAirgappedHardware"
2828
airgappedHardware = "AirgappedHardware"
29-
maxIPPoolSize = 10
29+
maxIPPoolSize = 7
3030
minIPPoolSize = 1
3131
tinkerbellIPPoolSize = 2
3232

@@ -388,24 +388,33 @@ func splitTests(testsList []string, conf ParallelRunConf) ([]instanceRunConf, er
388388
if vsphereTestsRe.MatchString(testName) {
389389
if privateNetworkTestsRe.MatchString(testName) {
390390
if multiClusterTest {
391-
ips = vspherePrivateIPMan.reserveIPPool(maxIPPoolSize)
391+
ips, err = vspherePrivateIPMan.reserveIPPool(maxIPPoolSize)
392392
} else {
393-
ips = vspherePrivateIPMan.reserveIPPool(minIPPoolSize)
393+
ips, err = vspherePrivateIPMan.reserveIPPool(minIPPoolSize)
394394
}
395395
} else {
396396
if multiClusterTest {
397-
ips = vsphereIPMan.reserveIPPool(maxIPPoolSize)
397+
ips, err = vsphereIPMan.reserveIPPool(maxIPPoolSize)
398398
} else {
399-
ips = vsphereIPMan.reserveIPPool(minIPPoolSize)
399+
ips, err = vsphereIPMan.reserveIPPool(minIPPoolSize)
400400
}
401401
}
402+
if err != nil {
403+
return nil, fmt.Errorf("failed to reserve IP pool for test %s: %v", testName, err)
404+
}
402405
} else if nutanixTestsRe.MatchString(testName) {
403-
ips = nutanixIPMan.reserveIPPool(minIPPoolSize)
406+
ips, err = nutanixIPMan.reserveIPPool(minIPPoolSize)
407+
if err != nil {
408+
return nil, fmt.Errorf("failed to reserve IP pool for test %s: %v", testName, err)
409+
}
404410
} else if cloudstackTestRe.MatchString(testName) {
405411
if multiClusterTest {
406-
ips = cloudstackIPMan.reserveIPPool(maxIPPoolSize)
412+
ips, err = cloudstackIPMan.reserveIPPool(maxIPPoolSize)
407413
} else {
408-
ips = cloudstackIPMan.reserveIPPool(minIPPoolSize)
414+
ips, err = cloudstackIPMan.reserveIPPool(minIPPoolSize)
415+
}
416+
if err != nil {
417+
return nil, fmt.Errorf("failed to reserve IP pool for test %s: %v", testName, err)
409418
}
410419
}
411420

@@ -442,13 +451,19 @@ func appendNonAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList
442451
if start > end/2 {
443452
break
444453
}
445-
ipPool := ipManager.reserveIPPool(tinkerbellIPPoolSize)
454+
ipPool, err := ipManager.reserveIPPool(tinkerbellIPPoolSize)
455+
if err != nil {
456+
return nil, fmt.Errorf("failed to reserve IP pool for tinkerbell test %s: %v", nonAirgappedTinkerbellTestsWithCount[start].Name, err)
457+
}
446458
runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), nonAirgappedTinkerbellTestsWithCount[start].Name, ipPool, []*api.Hardware{}, nonAirgappedTinkerbellTestsWithCount[start].Count, false, VSphereTestRunnerType, testRunnerConfig))
447459

448460
// Pop from both ends to run a longer count tests and shorter count tests together
449461
// to efficiently use the available hardware.
450462
if end-start > start {
451-
ipPool := ipManager.reserveIPPool(tinkerbellIPPoolSize)
463+
ipPool, err := ipManager.reserveIPPool(tinkerbellIPPoolSize)
464+
if err != nil {
465+
return nil, fmt.Errorf("failed to reserve IP pool for tinkerbell test %s: %v", nonAirgappedTinkerbellTestsWithCount[end-start].Name, err)
466+
}
452467
runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), nonAirgappedTinkerbellTestsWithCount[end-start].Name, ipPool, []*api.Hardware{}, nonAirgappedTinkerbellTestsWithCount[end-start].Count, false, VSphereTestRunnerType, testRunnerConfig))
453468
}
454469
}
@@ -469,7 +484,10 @@ func appendAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList []
469484
return nil, err
470485
}
471486
for _, test := range airgappedTinkerbellTestsWithCount {
472-
ipPool := ipManager.reserveIPPool(tinkerbellIPPoolSize)
487+
ipPool, err := ipManager.reserveIPPool(tinkerbellIPPoolSize)
488+
if err != nil {
489+
return nil, fmt.Errorf("failed to reserve IP pool for airgapped tinkerbell test %s: %v", test.Name, err)
490+
}
473491
runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), test.Name, ipPool, []*api.Hardware{}, test.Count, true, VSphereTestRunnerType, testRunnerConfig))
474492
}
475493

pkg/networkutils/ipgenerator.go

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,57 @@ func NewIPGenerator(netClient NetClient) IPGenerator {
1919
}
2020
}
2121

22-
func (ipgen IPGenerator) GenerateUniqueIP(cidrBlock string) (string, error) {
22+
func (ipgen IPGenerator) GenerateUniqueIP(cidrBlock string, usedIPs map[string]bool) (string, error) {
2323
_, cidr, err := net.ParseCIDR(cidrBlock)
2424
if err != nil {
2525
return "", err
2626
}
27-
uniqueIp, err := ipgen.randIp(cidr)
28-
if err != nil {
29-
return "", err
30-
}
31-
for IsIPInUse(ipgen.netClient, uniqueIp.String()) {
32-
uniqueIp, err = ipgen.randIp(cidr)
33-
if err != nil {
34-
return "", err
27+
28+
// Start from the first IP in the CIDR range
29+
ip := incrementIP(copyIP(cidr.IP))
30+
checked := 0
31+
32+
// Iterate sequentially through all IPs in the range
33+
for cidr.Contains(ip) {
34+
ipStr := ip.String()
35+
checked++
36+
37+
if usedIPs != nil && usedIPs[ipStr] {
38+
ip = incrementIP(ip)
39+
continue
40+
}
41+
42+
if IsIPInUse(ipgen.netClient, ipStr) {
43+
ip = incrementIP(ip)
44+
continue
3545
}
46+
47+
return ipStr, nil
3648
}
37-
return uniqueIp.String(), nil
49+
50+
return "", fmt.Errorf("no available IPs in CIDR %s (checked %d IPs)", cidrBlock, checked)
3851
}
3952

40-
// generates a random ip within the specified cidr block.
41-
func (ipgen IPGenerator) randIp(cidr *net.IPNet) (net.IP, error) {
42-
newIp := *new(net.IP)
43-
for i := 0; i < 4; i++ {
44-
newIp = append(newIp, byte(ipgen.rand.Intn(255))&^cidr.Mask[i]|cidr.IP[i])
45-
}
46-
if !cidr.Contains(newIp) {
47-
return nil, fmt.Errorf("random IP generation failed")
53+
// incrementIP returns a new IP address incremented by 1.
54+
func incrementIP(ip net.IP) net.IP {
55+
// Make a copy to avoid modifying the original
56+
newIP := copyIP(ip)
57+
58+
// Increment from the last byte
59+
for i := len(newIP) - 1; i >= 0; i-- {
60+
newIP[i]++
61+
if newIP[i] != 0 {
62+
// No overflow, we're done
63+
break
64+
}
65+
// Overflow occurred, continue to next byte
4866
}
49-
return newIp, nil
67+
return newIP
68+
}
69+
70+
// copyIP creates a copy of the IP address.
71+
func copyIP(ip net.IP) net.IP {
72+
dup := make(net.IP, len(ip))
73+
copy(dup, ip)
74+
return dup
5075
}

pkg/networkutils/ipgenerator_test.go

Lines changed: 139 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package networkutils_test
33
import (
44
"errors"
55
"net"
6+
"strings"
7+
"syscall"
68
"testing"
79
"time"
810

@@ -19,12 +21,148 @@ func (n *DummyNetClient) DialTimeout(network, address string, timeout time.Durat
1921
return nil, errors.New("")
2022
}
2123

24+
// MockNetClientAllInUse simulates all IPs being in use.
25+
type MockNetClientAllInUse struct{}
26+
27+
func (n *MockNetClientAllInUse) DialTimeout(network, address string, timeout time.Duration) (net.Conn, error) {
28+
// Simulate all IPs are in use by returning ECONNREFUSED
29+
// IsIPInUse checks for: err == nil OR errors.Is(err, syscall.ECONNREFUSED) OR errors.Is(err, syscall.ECONNRESET)
30+
return nil, &net.OpError{
31+
Op: "dial",
32+
Net: "tcp",
33+
Err: syscall.ECONNREFUSED,
34+
}
35+
}
36+
37+
// MockNetClientSomeInUse simulates specific IPs being in use.
38+
type MockNetClientSomeInUse struct {
39+
inUseIPs map[string]bool
40+
}
41+
42+
func (n *MockNetClientSomeInUse) DialTimeout(network, address string, timeout time.Duration) (net.Conn, error) {
43+
// Extract IP from address (format is "IP:port")
44+
ip := strings.Split(address, ":")[0]
45+
if n.inUseIPs[ip] {
46+
// Simulate IP in use - return ECONNREFUSED
47+
return nil, &net.OpError{
48+
Op: "dial",
49+
Net: "tcp",
50+
Err: syscall.ECONNREFUSED,
51+
}
52+
}
53+
// Simulate IP not in use - return generic error
54+
return nil, errors.New("connection timeout")
55+
}
56+
2257
func TestGenerateUniqueIP(t *testing.T) {
2358
cidrBlock := "1.2.3.4/16"
2459

2560
ipgen := networkutils.NewIPGenerator(&DummyNetClient{})
26-
ip, err := ipgen.GenerateUniqueIP(cidrBlock)
61+
ip, err := ipgen.GenerateUniqueIP(cidrBlock, nil)
2762
if err != nil {
2863
t.Fatalf("GenerateUniqueIP() ip = %v error: %v", ip, err)
2964
}
3065
}
66+
67+
func TestGenerateUniqueIPWithUsedIPsMap(t *testing.T) {
68+
cidrBlock := "192.168.1.0/29" // Small range: .0 to .7 (6 usable IPs)
69+
70+
// Mark first 3 IPs as used
71+
usedIPs := map[string]bool{
72+
"192.168.1.1": true,
73+
"192.168.1.2": true,
74+
"192.168.1.3": true,
75+
}
76+
77+
ipgen := networkutils.NewIPGenerator(&DummyNetClient{})
78+
ip, err := ipgen.GenerateUniqueIP(cidrBlock, usedIPs)
79+
if err != nil {
80+
t.Fatalf("GenerateUniqueIP() error = %v", err)
81+
}
82+
83+
// Should skip the used IPs and return .4 or later
84+
if ip == "192.168.1.1" || ip == "192.168.1.2" || ip == "192.168.1.3" {
85+
t.Errorf("GenerateUniqueIP() returned used IP: %v", ip)
86+
}
87+
}
88+
89+
func TestGenerateUniqueIPExhaustion(t *testing.T) {
90+
cidrBlock := "10.0.0.0/30" // Very small range: only .0 to .3 (2 usable IPs)
91+
92+
// Use a client that marks all IPs as in use
93+
ipgen := networkutils.NewIPGenerator(&MockNetClientAllInUse{})
94+
_, err := ipgen.GenerateUniqueIP(cidrBlock, nil)
95+
if err == nil {
96+
t.Fatal("GenerateUniqueIP() expected error for exhausted IP pool, got nil")
97+
}
98+
99+
// Verify error message mentions the CIDR
100+
if !strings.Contains(err.Error(), cidrBlock) {
101+
t.Errorf("Error message should mention CIDR %s, got: %v", cidrBlock, err)
102+
}
103+
}
104+
105+
func TestGenerateUniqueIPWithNetworkInUse(t *testing.T) {
106+
cidrBlock := "10.1.1.0/29" // Small range for testing
107+
108+
// Mark first 2 IPs as in use on network
109+
mockClient := &MockNetClientSomeInUse{
110+
inUseIPs: map[string]bool{
111+
"10.1.1.1": true,
112+
"10.1.1.2": true,
113+
},
114+
}
115+
116+
ipgen := networkutils.NewIPGenerator(mockClient)
117+
ip, err := ipgen.GenerateUniqueIP(cidrBlock, nil)
118+
if err != nil {
119+
t.Fatalf("GenerateUniqueIP() error = %v", err)
120+
}
121+
122+
// Should skip the in-use IPs
123+
if ip == "10.1.1.1" || ip == "10.1.1.2" {
124+
t.Errorf("GenerateUniqueIP() returned in-use IP: %v", ip)
125+
}
126+
}
127+
128+
func TestGenerateUniqueIPCombinedUsedAndNetwork(t *testing.T) {
129+
cidrBlock := "172.16.0.0/29"
130+
131+
// Mark some IPs as used in map
132+
usedIPs := map[string]bool{
133+
"172.16.0.1": true,
134+
"172.16.0.2": true,
135+
}
136+
137+
// Mark some IPs as in use on network
138+
mockClient := &MockNetClientSomeInUse{
139+
inUseIPs: map[string]bool{
140+
"172.16.0.3": true,
141+
"172.16.0.4": true,
142+
},
143+
}
144+
145+
ipgen := networkutils.NewIPGenerator(mockClient)
146+
ip, err := ipgen.GenerateUniqueIP(cidrBlock, usedIPs)
147+
if err != nil {
148+
t.Fatalf("GenerateUniqueIP() error = %v", err)
149+
}
150+
151+
// Should skip all used and in-use IPs, return .5 or later
152+
usedOrInUse := []string{"172.16.0.1", "172.16.0.2", "172.16.0.3", "172.16.0.4"}
153+
for _, badIP := range usedOrInUse {
154+
if ip == badIP {
155+
t.Errorf("GenerateUniqueIP() returned used/in-use IP: %v", ip)
156+
}
157+
}
158+
}
159+
160+
func TestGenerateUniqueIPInvalidCIDR(t *testing.T) {
161+
cidrBlock := "invalid-cidr"
162+
163+
ipgen := networkutils.NewIPGenerator(&DummyNetClient{})
164+
_, err := ipgen.GenerateUniqueIP(cidrBlock, nil)
165+
if err == nil {
166+
t.Fatal("GenerateUniqueIP() expected error for invalid CIDR, got nil")
167+
}
168+
}

test/framework/network.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func PopIPFromEnv(ipPoolEnvVar string) (string, error) {
3131

3232
func GenerateUniqueIp(cidr string) (string, error) {
3333
ipgen := networkutils.NewIPGenerator(&networkutils.DefaultNetClient{})
34-
ip, err := ipgen.GenerateUniqueIP(cidr)
34+
ip, err := ipgen.GenerateUniqueIP(cidr, nil)
3535
if err != nil {
3636
return "", fmt.Errorf("getting unique IP for cidr %s: %v", cidr, err)
3737
}

0 commit comments

Comments
 (0)