Skip to content

Commit 747e473

Browse files
add max_retries_down support for octavia health monitors (#2379)
Co-authored-by: kayrus <[email protected]>
1 parent 0cf5884 commit 747e473

File tree

5 files changed

+83
-51
lines changed

5 files changed

+83
-51
lines changed

docs/openstack-cloud-controller-manager/expose-applications-using-loadbalancer-type-service.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,11 @@ Request Body:
192192

193193
- `loadbalancer.openstack.org/health-monitor-max-retries`
194194

195-
Defines the health monitor retry count for the loadbalancer pools.
195+
Defines the health monitor retry count for the loadbalancer pool members.
196+
197+
- `loadbalancer.openstack.org/health-monitor-max-retries-down`
198+
199+
Defines the health monitor retry count for the loadbalancer pool members to be marked down.
196200

197201
- `loadbalancer.openstack.org/flavor-id`
198202

@@ -255,7 +259,8 @@ subnet-id="fa6a4e6c-6ae4-4dde-ae86-3e2f452c1f03"
255259
create-monitor=true
256260
monitor-delay=60s
257261
monitor-timeout=30s
258-
monitor-max-retries=5
262+
monitor-max-retries=1
263+
monitor-max-retries-down=3
259264

260265
[LoadBalancerClass "internetFacing"]
261266
floating-network-id="c57af0a0-da92-49be-a98a-345ceca004b3"

docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ Although the openstack-cloud-controller-manager was initially implemented with N
228228
* `monitor-max-retries`
229229
The number of successful checks before changing the operating status of the load balancer member to ONLINE. A valid value is from 1 to 10. Default: 1
230230
231+
* `monitor-max-retries-down`
232+
The number of unsuccessful checks before changing the operating status of the load balancer member to ERROR. A valid value is from 1 to 10. Default: 3
233+
231234
* `monitor-timeout`
232235
The maximum time, in seconds, that a monitor waits to connect backend before it times out. Default: 3
233236

pkg/openstack/loadbalancer.go

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,13 @@ const (
8787
ServiceAnnotationLoadBalancerAvailabilityZone = "loadbalancer.openstack.org/availability-zone"
8888
// ServiceAnnotationLoadBalancerEnableHealthMonitor defines whether to create health monitor for the load balancer
8989
// pool, if not specified, use 'create-monitor' config. The health monitor can be created or deleted dynamically.
90-
ServiceAnnotationLoadBalancerEnableHealthMonitor = "loadbalancer.openstack.org/enable-health-monitor"
91-
ServiceAnnotationLoadBalancerHealthMonitorDelay = "loadbalancer.openstack.org/health-monitor-delay"
92-
ServiceAnnotationLoadBalancerHealthMonitorTimeout = "loadbalancer.openstack.org/health-monitor-timeout"
93-
ServiceAnnotationLoadBalancerHealthMonitorMaxRetries = "loadbalancer.openstack.org/health-monitor-max-retries"
94-
ServiceAnnotationLoadBalancerLoadbalancerHostname = "loadbalancer.openstack.org/hostname"
95-
ServiceAnnotationLoadBalancerAddress = "loadbalancer.openstack.org/load-balancer-address"
90+
ServiceAnnotationLoadBalancerEnableHealthMonitor = "loadbalancer.openstack.org/enable-health-monitor"
91+
ServiceAnnotationLoadBalancerHealthMonitorDelay = "loadbalancer.openstack.org/health-monitor-delay"
92+
ServiceAnnotationLoadBalancerHealthMonitorTimeout = "loadbalancer.openstack.org/health-monitor-timeout"
93+
ServiceAnnotationLoadBalancerHealthMonitorMaxRetries = "loadbalancer.openstack.org/health-monitor-max-retries"
94+
ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown = "loadbalancer.openstack.org/health-monitor-max-retries-down"
95+
ServiceAnnotationLoadBalancerLoadbalancerHostname = "loadbalancer.openstack.org/hostname"
96+
ServiceAnnotationLoadBalancerAddress = "loadbalancer.openstack.org/load-balancer-address"
9697
// revive:disable:var-naming
9798
ServiceAnnotationTlsContainerRef = "loadbalancer.openstack.org/default-tls-container-ref"
9899
// revive:enable:var-naming
@@ -324,33 +325,34 @@ func tagList(tags string) ([]string, bool, bool) {
324325

325326
// serviceConfig contains configurations for creating a Service.
326327
type serviceConfig struct {
327-
internal bool
328-
connLimit int
329-
configClassName string
330-
lbNetworkID string
331-
lbSubnetID string
332-
lbMemberSubnetID string
333-
lbPublicNetworkID string
334-
lbPublicSubnetSpec *floatingSubnetSpec
335-
keepClientIP bool
336-
enableProxyProtocol bool
337-
timeoutClientData int
338-
timeoutMemberConnect int
339-
timeoutMemberData int
340-
timeoutTCPInspect int
341-
allowedCIDR []string
342-
enableMonitor bool
343-
flavorID string
344-
availabilityZone string
345-
tlsContainerRef string
346-
lbID string
347-
lbName string
348-
supportLBTags bool
349-
healthCheckNodePort int
350-
healthMonitorDelay int
351-
healthMonitorTimeout int
352-
healthMonitorMaxRetries int
353-
preferredIPFamily corev1.IPFamily // preferred (the first) IP family indicated in service's `spec.ipFamilies`
328+
internal bool
329+
connLimit int
330+
configClassName string
331+
lbNetworkID string
332+
lbSubnetID string
333+
lbMemberSubnetID string
334+
lbPublicNetworkID string
335+
lbPublicSubnetSpec *floatingSubnetSpec
336+
keepClientIP bool
337+
enableProxyProtocol bool
338+
timeoutClientData int
339+
timeoutMemberConnect int
340+
timeoutMemberData int
341+
timeoutTCPInspect int
342+
allowedCIDR []string
343+
enableMonitor bool
344+
flavorID string
345+
availabilityZone string
346+
tlsContainerRef string
347+
lbID string
348+
lbName string
349+
supportLBTags bool
350+
healthCheckNodePort int
351+
healthMonitorDelay int
352+
healthMonitorTimeout int
353+
healthMonitorMaxRetries int
354+
healthMonitorMaxRetriesDown int
355+
preferredIPFamily corev1.IPFamily // preferred (the first) IP family indicated in service's `spec.ipFamilies`
354356
}
355357

356358
type listenerKey struct {
@@ -1051,11 +1053,15 @@ func (lbaas *LbaasV2) ensureOctaviaHealthMonitor(lbID string, name string, pool
10511053
}
10521054
monitorID = ""
10531055
}
1054-
if svcConf.healthMonitorDelay != monitor.Delay || svcConf.healthMonitorTimeout != monitor.Timeout || svcConf.healthMonitorMaxRetries != monitor.MaxRetries {
1056+
if svcConf.healthMonitorDelay != monitor.Delay ||
1057+
svcConf.healthMonitorTimeout != monitor.Timeout ||
1058+
svcConf.healthMonitorMaxRetries != monitor.MaxRetries ||
1059+
svcConf.healthMonitorMaxRetriesDown != monitor.MaxRetriesDown {
10551060
updateOpts := v2monitors.UpdateOpts{
1056-
Delay: svcConf.healthMonitorDelay,
1057-
Timeout: svcConf.healthMonitorTimeout,
1058-
MaxRetries: svcConf.healthMonitorMaxRetries,
1061+
Delay: svcConf.healthMonitorDelay,
1062+
Timeout: svcConf.healthMonitorTimeout,
1063+
MaxRetries: svcConf.healthMonitorMaxRetries,
1064+
MaxRetriesDown: svcConf.healthMonitorMaxRetriesDown,
10591065
}
10601066
klog.Infof("Updating health monitor %s updateOpts %+v", monitorID, updateOpts)
10611067
if err := openstackutil.UpdateHealthMonitor(lbaas.lb, monitorID, updateOpts, lbID); err != nil {
@@ -1102,10 +1108,11 @@ func (lbaas *LbaasV2) canUseHTTPMonitor(port corev1.ServicePort) bool {
11021108
// buildMonitorCreateOpts returns a v2monitors.CreateOpts without PoolID for consumption of both, fully popuplated Loadbalancers and Monitors.
11031109
func (lbaas *LbaasV2) buildMonitorCreateOpts(svcConf *serviceConfig, port corev1.ServicePort) v2monitors.CreateOpts {
11041110
opts := v2monitors.CreateOpts{
1105-
Type: string(port.Protocol),
1106-
Delay: svcConf.healthMonitorDelay,
1107-
Timeout: svcConf.healthMonitorTimeout,
1108-
MaxRetries: svcConf.healthMonitorMaxRetries,
1111+
Type: string(port.Protocol),
1112+
Delay: svcConf.healthMonitorDelay,
1113+
Timeout: svcConf.healthMonitorTimeout,
1114+
MaxRetries: svcConf.healthMonitorMaxRetries,
1115+
MaxRetriesDown: svcConf.healthMonitorMaxRetriesDown,
11091116
}
11101117
if port.Protocol == corev1.ProtocolUDP {
11111118
opts.Type = "UDP-CONNECT"
@@ -1552,6 +1559,7 @@ func (lbaas *LbaasV2) checkServiceUpdate(service *corev1.Service, nodes []*corev
15521559
svcConf.healthMonitorDelay = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorDelay, int(lbaas.opts.MonitorDelay.Duration.Seconds()))
15531560
svcConf.healthMonitorTimeout = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorTimeout, int(lbaas.opts.MonitorTimeout.Duration.Seconds()))
15541561
svcConf.healthMonitorMaxRetries = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetries, int(lbaas.opts.MonitorMaxRetries))
1562+
svcConf.healthMonitorMaxRetriesDown = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown, int(lbaas.opts.MonitorMaxRetriesDown))
15551563
return nil
15561564
}
15571565

@@ -1786,6 +1794,7 @@ func (lbaas *LbaasV2) checkService(service *corev1.Service, nodes []*corev1.Node
17861794
svcConf.healthMonitorDelay = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorDelay, int(lbaas.opts.MonitorDelay.Duration.Seconds()))
17871795
svcConf.healthMonitorTimeout = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorTimeout, int(lbaas.opts.MonitorTimeout.Duration.Seconds()))
17881796
svcConf.healthMonitorMaxRetries = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetries, int(lbaas.opts.MonitorMaxRetries))
1797+
svcConf.healthMonitorMaxRetriesDown = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown, int(lbaas.opts.MonitorMaxRetriesDown))
17891798
return nil
17901799
}
17911800

pkg/openstack/openstack.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ type LoadBalancerOpts struct {
101101
MonitorDelay util.MyDuration `gcfg:"monitor-delay"`
102102
MonitorTimeout util.MyDuration `gcfg:"monitor-timeout"`
103103
MonitorMaxRetries uint `gcfg:"monitor-max-retries"`
104+
MonitorMaxRetriesDown uint `gcfg:"monitor-max-retries-down"`
104105
ManageSecurityGroups bool `gcfg:"manage-security-groups"`
105106
InternalLB bool `gcfg:"internal-lb"` // default false
106107
CascadeDelete bool `gcfg:"cascade-delete"`
@@ -211,6 +212,7 @@ func ReadConfig(config io.Reader) (Config, error) {
211212
cfg.LoadBalancer.MonitorDelay = util.MyDuration{Duration: 5 * time.Second}
212213
cfg.LoadBalancer.MonitorTimeout = util.MyDuration{Duration: 3 * time.Second}
213214
cfg.LoadBalancer.MonitorMaxRetries = 1
215+
cfg.LoadBalancer.MonitorMaxRetriesDown = 3
214216
cfg.LoadBalancer.CascadeDelete = true
215217
cfg.LoadBalancer.EnableIngressHostname = false
216218
cfg.LoadBalancer.IngressHostnameSuffix = defaultProxyHostnameSuffix

pkg/openstack/openstack_test.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ func TestReadConfig(t *testing.T) {
9898
create-monitor = yes
9999
monitor-delay = 1m
100100
monitor-timeout = 30s
101-
monitor-max-retries = 3
101+
monitor-max-retries = 1
102+
monitor-max-retries-down = 3
102103
[Metadata]
103104
search-order = configDrive, metadataService
104105
`))
@@ -131,16 +132,19 @@ func TestReadConfig(t *testing.T) {
131132
}
132133

133134
if !cfg.LoadBalancer.CreateMonitor {
134-
t.Errorf("incorrect lb.createmonitor: %t", cfg.LoadBalancer.CreateMonitor)
135+
t.Errorf("incorrect lb.create-monitor: %t", cfg.LoadBalancer.CreateMonitor)
135136
}
136137
if cfg.LoadBalancer.MonitorDelay.Duration != 1*time.Minute {
137-
t.Errorf("incorrect lb.monitordelay: %s", cfg.LoadBalancer.MonitorDelay)
138+
t.Errorf("incorrect lb.monitor-delay: %s", cfg.LoadBalancer.MonitorDelay)
138139
}
139140
if cfg.LoadBalancer.MonitorTimeout.Duration != 30*time.Second {
140-
t.Errorf("incorrect lb.monitortimeout: %s", cfg.LoadBalancer.MonitorTimeout)
141+
t.Errorf("incorrect lb.monitor-timeout: %s", cfg.LoadBalancer.MonitorTimeout)
141142
}
142-
if cfg.LoadBalancer.MonitorMaxRetries != 3 {
143-
t.Errorf("incorrect lb.monitormaxretries: %d", cfg.LoadBalancer.MonitorMaxRetries)
143+
if cfg.LoadBalancer.MonitorMaxRetries != 1 {
144+
t.Errorf("incorrect lb.monitor-max-retries: %d", cfg.LoadBalancer.MonitorMaxRetries)
145+
}
146+
if cfg.LoadBalancer.MonitorMaxRetriesDown != 3 {
147+
t.Errorf("incorrect lb.monitor-max-retries-down: %d", cfg.LoadBalancer.MonitorMaxRetriesDown)
144148
}
145149
if cfg.Metadata.SearchOrder != "configDrive, metadataService" {
146150
t.Errorf("incorrect md.search-order: %v", cfg.Metadata.SearchOrder)
@@ -187,7 +191,8 @@ clouds:
187191
create-monitor = yes
188192
monitor-delay = 1m
189193
monitor-timeout = 30s
190-
monitor-max-retries = 3
194+
monitor-max-retries = 1
195+
monitor-max-retries-down = 3
191196
[Metadata]
192197
search-order = configDrive, metadataService
193198
`))
@@ -227,7 +232,15 @@ clouds:
227232

228233
// Make non-global sections dont get overwritten
229234
if !cfg.LoadBalancer.CreateMonitor {
230-
t.Errorf("incorrect lb.createmonitor: %t", cfg.LoadBalancer.CreateMonitor)
235+
t.Errorf("incorrect lb.create-monitor: %t", cfg.LoadBalancer.CreateMonitor)
236+
}
237+
238+
if cfg.LoadBalancer.MonitorMaxRetries != 1 {
239+
t.Errorf("incorrect lb.monitor-max-retries: %d", cfg.LoadBalancer.MonitorMaxRetries)
240+
}
241+
242+
if cfg.LoadBalancer.MonitorMaxRetriesDown != 3 {
243+
t.Errorf("incorrect lb.monitor-max-retries-down: %d", cfg.LoadBalancer.MonitorMaxRetriesDown)
231244
}
232245
}
233246

0 commit comments

Comments
 (0)