Skip to content

Commit fa8c306

Browse files
Delete LB that gone into ERROR state (#2536)
This commit implements deleting the LB when it goes into ERROR state on creation. This will allow us to be less prone to transient Octavia issues. A possible improvement could be to extend this behavior to all cases when we wait for the LB to become ACTIVE. The problem with that is that attempt to implement that complicates the code a lot, so let's start with this first step. Co-authored-by: Michał Dulko <[email protected]>
1 parent 133871a commit fa8c306

File tree

2 files changed

+88
-71
lines changed

2 files changed

+88
-71
lines changed

pkg/openstack/loadbalancer.go

Lines changed: 87 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,14 @@ func (lbaas *LbaasV2) createOctaviaLoadBalancer(name, clusterName string, servic
545545
}
546546

547547
if loadbalancer, err = openstackutil.WaitActiveAndGetLoadBalancer(lbaas.lb, loadbalancer.ID); err != nil {
548+
if loadbalancer.ProvisioningStatus == errorStatus {
549+
// If LB landed in ERROR state we should delete it and retry the creation later.
550+
if err = lbaas.deleteLoadBalancer(loadbalancer, service, svcConf, true); err != nil {
551+
return nil, fmt.Errorf("loadbalancer %s is in ERROR state and there was an error when removing it: %v", loadbalancer.ID, err)
552+
}
553+
return nil, fmt.Errorf("loadbalancer %s has gone into ERROR state, please check Octavia for details. Load balancer was "+
554+
"deleted and its creation will be retried", loadbalancer.ID)
555+
}
548556
return nil, err
549557
}
550558

@@ -2432,6 +2440,83 @@ func (lbaas *LbaasV2) deleteFIPIfCreatedByProvider(fip *floatingips.FloatingIP,
24322440
return true, nil
24332441
}
24342442

2443+
// deleteLoadBalancer removes the LB and it's children either by using Octavia cascade deletion or manually
2444+
func (lbaas *LbaasV2) deleteLoadBalancer(loadbalancer *loadbalancers.LoadBalancer, service *corev1.Service, svcConf *serviceConfig, needDeleteLB bool) error {
2445+
if needDeleteLB && lbaas.opts.CascadeDelete {
2446+
klog.InfoS("Deleting load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2447+
if err := openstackutil.DeleteLoadbalancer(lbaas.lb, loadbalancer.ID, true); err != nil {
2448+
return err
2449+
}
2450+
klog.InfoS("Deleted load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2451+
} else {
2452+
// get all listeners associated with this loadbalancer
2453+
listenerList, err := openstackutil.GetListenersByLoadBalancerID(lbaas.lb, loadbalancer.ID)
2454+
if err != nil {
2455+
return fmt.Errorf("error getting LB %s listeners: %v", loadbalancer.ID, err)
2456+
}
2457+
2458+
if !needDeleteLB {
2459+
var listenersToDelete []listeners.Listener
2460+
curListenerMapping := make(map[listenerKey]*listeners.Listener)
2461+
for i, l := range listenerList {
2462+
key := listenerKey{Protocol: listeners.Protocol(l.Protocol), Port: l.ProtocolPort}
2463+
curListenerMapping[key] = &listenerList[i]
2464+
}
2465+
2466+
for _, port := range service.Spec.Ports {
2467+
proto := getListenerProtocol(port.Protocol, svcConf)
2468+
listener, isPresent := curListenerMapping[listenerKey{
2469+
Protocol: proto,
2470+
Port: int(port.Port),
2471+
}]
2472+
if isPresent && cpoutil.Contains(listener.Tags, loadbalancer.Name) {
2473+
listenersToDelete = append(listenersToDelete, *listener)
2474+
}
2475+
}
2476+
listenerList = listenersToDelete
2477+
}
2478+
2479+
// get all pools (and health monitors) associated with this loadbalancer
2480+
var monitorIDs []string
2481+
for _, listener := range listenerList {
2482+
pool, err := openstackutil.GetPoolByListener(lbaas.lb, loadbalancer.ID, listener.ID)
2483+
if err != nil && err != cpoerrors.ErrNotFound {
2484+
return fmt.Errorf("error getting pool for listener %s: %v", listener.ID, err)
2485+
}
2486+
if pool != nil {
2487+
if pool.MonitorID != "" {
2488+
monitorIDs = append(monitorIDs, pool.MonitorID)
2489+
}
2490+
}
2491+
}
2492+
2493+
// delete monitors
2494+
for _, monitorID := range monitorIDs {
2495+
klog.InfoS("Deleting health monitor", "monitorID", monitorID, "lbID", loadbalancer.ID)
2496+
if err := openstackutil.DeleteHealthMonitor(lbaas.lb, monitorID, loadbalancer.ID); err != nil {
2497+
return err
2498+
}
2499+
klog.InfoS("Deleted health monitor", "monitorID", monitorID, "lbID", loadbalancer.ID)
2500+
}
2501+
2502+
// delete listeners
2503+
if err := lbaas.deleteListeners(loadbalancer.ID, listenerList); err != nil {
2504+
return err
2505+
}
2506+
2507+
if needDeleteLB {
2508+
// delete the loadbalancer in old way, i.e. no cascading.
2509+
klog.InfoS("Deleting load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2510+
if err := openstackutil.DeleteLoadbalancer(lbaas.lb, loadbalancer.ID, false); err != nil {
2511+
return err
2512+
}
2513+
klog.InfoS("Deleted load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2514+
}
2515+
}
2516+
2517+
return nil
2518+
}
2519+
24352520
func (lbaas *LbaasV2) ensureLoadBalancerDeleted(ctx context.Context, clusterName string, service *corev1.Service) error {
24362521
lbName := lbaas.GetLoadBalancerName(ctx, clusterName, service)
24372522
legacyName := lbaas.getLoadBalancerLegacyName(ctx, clusterName, service)
@@ -2504,76 +2589,8 @@ func (lbaas *LbaasV2) ensureLoadBalancerDeleted(ctx context.Context, clusterName
25042589
}
25052590
}
25062591

2507-
if needDeleteLB && lbaas.opts.CascadeDelete {
2508-
klog.InfoS("Deleting load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2509-
if err := openstackutil.DeleteLoadbalancer(lbaas.lb, loadbalancer.ID, true); err != nil {
2510-
return err
2511-
}
2512-
klog.InfoS("Deleted load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2513-
} else {
2514-
// get all listeners associated with this loadbalancer
2515-
listenerList, err := openstackutil.GetListenersByLoadBalancerID(lbaas.lb, loadbalancer.ID)
2516-
if err != nil {
2517-
return fmt.Errorf("error getting LB %s listeners: %v", loadbalancer.ID, err)
2518-
}
2519-
2520-
if !needDeleteLB {
2521-
var listenersToDelete []listeners.Listener
2522-
curListenerMapping := make(map[listenerKey]*listeners.Listener)
2523-
for i, l := range listenerList {
2524-
key := listenerKey{Protocol: listeners.Protocol(l.Protocol), Port: l.ProtocolPort}
2525-
curListenerMapping[key] = &listenerList[i]
2526-
}
2527-
2528-
for _, port := range service.Spec.Ports {
2529-
proto := getListenerProtocol(port.Protocol, svcConf)
2530-
listener, isPresent := curListenerMapping[listenerKey{
2531-
Protocol: proto,
2532-
Port: int(port.Port),
2533-
}]
2534-
if isPresent && cpoutil.Contains(listener.Tags, lbName) {
2535-
listenersToDelete = append(listenersToDelete, *listener)
2536-
}
2537-
}
2538-
listenerList = listenersToDelete
2539-
}
2540-
2541-
// get all pools (and health monitors) associated with this loadbalancer
2542-
var monitorIDs []string
2543-
for _, listener := range listenerList {
2544-
pool, err := openstackutil.GetPoolByListener(lbaas.lb, loadbalancer.ID, listener.ID)
2545-
if err != nil && err != cpoerrors.ErrNotFound {
2546-
return fmt.Errorf("error getting pool for listener %s: %v", listener.ID, err)
2547-
}
2548-
if pool != nil {
2549-
if pool.MonitorID != "" {
2550-
monitorIDs = append(monitorIDs, pool.MonitorID)
2551-
}
2552-
}
2553-
}
2554-
2555-
// delete monitors
2556-
for _, monitorID := range monitorIDs {
2557-
klog.InfoS("Deleting health monitor", "monitorID", monitorID, "lbID", loadbalancer.ID)
2558-
if err := openstackutil.DeleteHealthMonitor(lbaas.lb, monitorID, loadbalancer.ID); err != nil {
2559-
return err
2560-
}
2561-
klog.InfoS("Deleted health monitor", "monitorID", monitorID, "lbID", loadbalancer.ID)
2562-
}
2563-
2564-
// delete listeners
2565-
if err := lbaas.deleteListeners(loadbalancer.ID, listenerList); err != nil {
2566-
return err
2567-
}
2568-
2569-
if needDeleteLB {
2570-
// delete the loadbalancer in old way, i.e. no cascading.
2571-
klog.InfoS("Deleting load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2572-
if err := openstackutil.DeleteLoadbalancer(lbaas.lb, loadbalancer.ID, false); err != nil {
2573-
return err
2574-
}
2575-
klog.InfoS("Deleted load balancer", "lbID", loadbalancer.ID, "service", klog.KObj(service))
2576-
}
2592+
if err = lbaas.deleteLoadBalancer(loadbalancer, service, svcConf, needDeleteLB); err != nil {
2593+
return err
25772594
}
25782595

25792596
// Remove the Service's tag from the load balancer.

pkg/util/openstack/loadbalancer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func WaitActiveAndGetLoadBalancer(client *gophercloud.ServiceClient, loadbalance
193193
klog.InfoS("Load balancer ACTIVE", "lbID", loadbalancerID)
194194
return true, nil
195195
} else if loadbalancer.ProvisioningStatus == errorStatus {
196-
return true, fmt.Errorf("loadbalancer has gone into ERROR state")
196+
return true, fmt.Errorf("loadbalancer %s has gone into ERROR state", loadbalancerID)
197197
} else {
198198
return false, nil
199199
}

0 commit comments

Comments
 (0)