Skip to content

Commit b88f30a

Browse files
authored
Merge pull request #114 from leaseweb/fix/ip_orphaning
fix: Prevent public IP orphaning under partial failure conditions
2 parents 07839bb + cb58410 commit b88f30a

File tree

2 files changed

+1065
-9
lines changed

2 files changed

+1065
-9
lines changed

cloudstack/cloudstack_loadbalancer.go

Lines changed: 143 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,49 @@ func (cs *CSCloud) EnsureLoadBalancer(ctx context.Context, clusterName string, s
133133
}
134134

135135
if !lb.hasLoadBalancerIP() { //nolint:nestif
136-
// Create or retrieve the load balancer IP.
137-
if err := lb.getLoadBalancerIP(service.Spec.LoadBalancerIP); err != nil {
138-
return nil, err
136+
// Before allocating a new IP, check the service annotation for a previously assigned IP.
137+
// This handles recovery from partial failures where the IP was allocated and annotated
138+
// but subsequent operations (rule creation, IP switch) failed.
139+
annotatedIP := getStringFromServiceAnnotation(service, ServiceAnnotationLoadBalancerAddress, "")
140+
if annotatedIP != "" {
141+
specIP := service.Spec.LoadBalancerIP
142+
if specIP == "" || specIP == annotatedIP {
143+
// Case 1: Auto-allocated IP recovery — the annotated IP is the one we want.
144+
found, lookupErr := lb.lookupPublicIPAddress(annotatedIP)
145+
if lookupErr != nil {
146+
klog.Warningf("Error looking up annotated IP %v for recovery: %v", annotatedIP, lookupErr)
147+
} else if found {
148+
klog.V(4).Infof("Recovered previously allocated IP %v from annotation", annotatedIP)
149+
}
150+
} else {
151+
// Case 2: IP switch retry — annotation holds old IP that failed to release.
152+
klog.V(4).Infof("Detected IP switch retry: annotation has %v, spec wants %v; attempting cleanup of old IP", annotatedIP, specIP)
153+
oldFound, lookupErr := lb.lookupPublicIPAddress(annotatedIP)
154+
if lookupErr != nil {
155+
klog.Warningf("Error looking up old annotated IP %v during IP switch cleanup: %v", annotatedIP, lookupErr)
156+
} else if oldFound {
157+
shouldRelease, shouldErr := cs.shouldReleaseLoadBalancerIP(lb, service)
158+
if shouldErr != nil {
159+
klog.Warningf("Error checking if old IP %v should be released: %v", annotatedIP, shouldErr)
160+
} else if shouldRelease {
161+
if releaseErr := lb.releaseLoadBalancerIP(); releaseErr != nil {
162+
klog.Warningf("Best-effort release of old IP %v failed: %v", annotatedIP, releaseErr)
163+
} else {
164+
klog.Infof("Released old IP %v during IP switch retry", annotatedIP)
165+
}
166+
}
167+
}
168+
// Reset so we fall through to allocate the new IP
169+
lb.ipAddr = ""
170+
lb.ipAddrID = ""
171+
}
172+
}
173+
174+
if !lb.hasLoadBalancerIP() {
175+
// Create or retrieve the load balancer IP.
176+
if err := lb.getLoadBalancerIP(service.Spec.LoadBalancerIP); err != nil {
177+
return nil, err
178+
}
139179
}
140180

141181
msg := fmt.Sprintf("Created new load balancer for service %s with algorithm '%s' and IP address %s", serviceName, lb.algorithm, lb.ipAddr)
@@ -351,11 +391,12 @@ func (cs *CSCloud) EnsureLoadBalancerDeleted(ctx context.Context, clusterName st
351391
return err
352392
}
353393

354-
// If no rules exist, the load balancer doesn't exist - nothing to delete
394+
// If no rules exist, the load balancer doesn't exist. However, an IP may have been
395+
// orphaned from a previous partial failure. Check the service annotation for cleanup.
355396
if len(lb.rules) == 0 {
356-
klog.V(4).Infof("No load balancer rules found for service, nothing to delete")
397+
klog.V(4).Infof("No load balancer rules found for service, checking annotation for orphaned IP")
357398

358-
return nil
399+
return cs.releaseOrphanedIPIfNeeded(lb, service)
359400
}
360401

361402
serviceName := fmt.Sprintf("%s/%s", service.Namespace, service.Name)
@@ -485,6 +526,50 @@ func (cs *CSCloud) shouldReleaseLoadBalancerIP(lb *loadBalancer, service *corev1
485526
return true, nil
486527
}
487528

529+
// releaseOrphanedIPIfNeeded checks the service annotation for an orphaned IP and releases it if appropriate.
530+
// This handles the case where all LB rules were successfully deleted but IP release failed on a prior attempt.
531+
func (cs *CSCloud) releaseOrphanedIPIfNeeded(lb *loadBalancer, service *corev1.Service) error {
532+
annotatedIP := getStringFromServiceAnnotation(service, ServiceAnnotationLoadBalancerAddress, "")
533+
if annotatedIP == "" {
534+
return nil
535+
}
536+
537+
found, lookupErr := lb.lookupPublicIPAddress(annotatedIP)
538+
if lookupErr != nil {
539+
klog.Warningf("Error looking up annotated IP %v during delete: %v", annotatedIP, lookupErr)
540+
541+
return nil
542+
}
543+
544+
if !found {
545+
return nil
546+
}
547+
548+
shouldRelease, shouldErr := cs.shouldReleaseLoadBalancerIP(lb, service)
549+
if shouldErr != nil {
550+
klog.Warningf("Error checking if annotated IP %v should be released: %v", annotatedIP, shouldErr)
551+
552+
return nil
553+
}
554+
555+
if !shouldRelease {
556+
klog.V(4).Infof("Annotated IP %v should not be released (user-specified or has other rules)", annotatedIP)
557+
558+
return nil
559+
}
560+
561+
if releaseErr := lb.releaseLoadBalancerIP(); releaseErr != nil {
562+
return fmt.Errorf("error releasing orphaned load balancer IP %v: %w", annotatedIP, releaseErr)
563+
}
564+
565+
serviceName := fmt.Sprintf("%s/%s", service.Namespace, service.Name)
566+
msg := fmt.Sprintf("Released orphaned load balancer IP %s for service %s", annotatedIP, serviceName)
567+
cs.eventRecorder.Event(service, corev1.EventTypeNormal, "ReleasedOrphanedIP", msg)
568+
klog.Info(msg)
569+
570+
return nil
571+
}
572+
488573
// GetLoadBalancerName returns the name of the LoadBalancer.
489574
func (cs *CSCloud) GetLoadBalancerName(_ context.Context, clusterName string, service *corev1.Service) string {
490575
return Sprintf255(lbNameFormat, servicePrefix, clusterName, service.Namespace, service.Name)
@@ -495,6 +580,20 @@ func (cs *CSCloud) getLoadBalancerLegacyName(_ context.Context, _ string, servic
495580
return cloudprovider.DefaultLoadBalancerName(service)
496581
}
497582

583+
// filterRulesByPrefix returns only the rules whose Name starts with the given prefix.
584+
// This is needed because CloudStack's SetKeyword uses LIKE %keyword% matching,
585+
// which can return rules belonging to other services with overlapping name substrings.
586+
func filterRulesByPrefix(rules []*cloudstack.LoadBalancerRule, prefix string) []*cloudstack.LoadBalancerRule {
587+
var filtered []*cloudstack.LoadBalancerRule
588+
for _, rule := range rules {
589+
if strings.HasPrefix(rule.Name, prefix) {
590+
filtered = append(filtered, rule)
591+
}
592+
}
593+
594+
return filtered
595+
}
596+
498597
// getLoadBalancerByName retrieves the IP address and ID and all the existing rules it can find.
499598
func (cs *CSCloud) getLoadBalancerByName(name, legacyName string) (*loadBalancer, error) {
500599
lb := &loadBalancer{
@@ -517,23 +616,29 @@ func (cs *CSCloud) getLoadBalancerByName(name, legacyName string) (*loadBalancer
517616
return nil, fmt.Errorf("error retrieving load balancer rules: %w", err)
518617
}
519618

619+
// Filter keyword results to exact prefix matches. CloudStack's SetKeyword uses
620+
// LIKE %keyword% matching, so searching for "foo" can also return "foobar" rules.
621+
filtered := filterRulesByPrefix(l.LoadBalancerRules, lb.name+"-")
622+
520623
// If no rules were found, check the legacy name.
521-
if len(l.LoadBalancerRules) == 0 { //nolint:nestif
624+
if len(filtered) == 0 { //nolint:nestif
522625
if len(legacyName) > 0 {
523626
p.SetKeyword(legacyName)
524627
l, err = cs.client.LoadBalancer.ListLoadBalancerRules(p)
525628
if err != nil {
526629
return nil, fmt.Errorf("error retrieving load balancer rules: %w", err)
527630
}
528-
if len(l.LoadBalancerRules) > 0 {
631+
legacyFiltered := filterRulesByPrefix(l.LoadBalancerRules, legacyName+"-")
632+
if len(legacyFiltered) > 0 {
529633
lb.name = legacyName
634+
filtered = legacyFiltered
530635
}
531636
} else {
532637
return lb, nil
533638
}
534639
}
535640

536-
for _, lbRule := range l.LoadBalancerRules {
641+
for _, lbRule := range filtered {
537642
lb.rules[lbRule.Name] = lbRule
538643

539644
if lb.ipAddr != "" && lb.ipAddr != lbRule.Publicip {
@@ -702,6 +807,35 @@ func (lb *loadBalancer) validatePublicIPAvailable(ip string) error {
702807
return nil
703808
}
704809

810+
// lookupPublicIPAddress checks whether the given IP address is already allocated in CloudStack.
811+
// If found and allocated, it sets lb.ipAddr and lb.ipAddrID and returns (true, nil).
812+
// If not found or not allocated, it returns (false, nil) without modifying lb state.
813+
// Unlike getPublicIPAddress, this method does NOT call associatePublicIPAddress for unallocated IPs.
814+
func (lb *loadBalancer) lookupPublicIPAddress(ip string) (bool, error) {
815+
p := lb.Address.NewListPublicIpAddressesParams()
816+
p.SetIpaddress(ip)
817+
p.SetAllocatedonly(true)
818+
p.SetListall(true)
819+
820+
if lb.projectID != "" {
821+
p.SetProjectid(lb.projectID)
822+
}
823+
824+
l, err := lb.Address.ListPublicIpAddresses(p)
825+
if err != nil {
826+
return false, fmt.Errorf("error looking up IP address %v: %w", ip, err)
827+
}
828+
829+
if l.Count != 1 {
830+
return false, nil
831+
}
832+
833+
lb.ipAddr = l.PublicIpAddresses[0].Ipaddress
834+
lb.ipAddrID = l.PublicIpAddresses[0].Id
835+
836+
return true, nil
837+
}
838+
705839
// getPublicIPAddressID retrieves the ID of the given IP, and sets the address and its ID.
706840
func (lb *loadBalancer) getPublicIPAddress(loadBalancerIP string) error {
707841
klog.V(4).Infof("Retrieve load balancer IP details: %v", loadBalancerIP)

0 commit comments

Comments
 (0)