Skip to content

Commit 0b4ceeb

Browse files
authored
[occm] add max_retries_down support for octavia health monitors (kubernetes#2372) (kubernetes#2380)
1 parent bd78906 commit 0b4ceeb

File tree

5 files changed

+83
-51
lines changed

5 files changed

+83
-51
lines changed

docs/openstack-cloud-controller-manager/expose-applications-using-loadbalancer-type-service.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,11 @@ Request Body:
192192

193193
- `loadbalancer.openstack.org/health-monitor-max-retries`
194194

195-
Defines the health monitor retry count for the loadbalancer pools.
195+
Defines the health monitor retry count for the loadbalancer pool members.
196+
197+
- `loadbalancer.openstack.org/health-monitor-max-retries-down`
198+
199+
Defines the health monitor retry count for the loadbalancer pool members to be marked down.
196200

197201
- `loadbalancer.openstack.org/flavor-id`
198202

@@ -255,7 +259,8 @@ subnet-id="fa6a4e6c-6ae4-4dde-ae86-3e2f452c1f03"
255259
create-monitor=true
256260
monitor-delay=60s
257261
monitor-timeout=30s
258-
monitor-max-retries=5
262+
monitor-max-retries=1
263+
monitor-max-retries-down=3
259264

260265
[LoadBalancerClass "internetFacing"]
261266
floating-network-id="c57af0a0-da92-49be-a98a-345ceca004b3"

docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,9 @@ Although the openstack-cloud-controller-manager was initially implemented with N
229229
* `monitor-max-retries`
230230
The number of successful checks before changing the operating status of the load balancer member to ONLINE. A valid value is from 1 to 10. Default: 1
231231
232+
* `monitor-max-retries-down`
233+
The number of unsuccessful checks before changing the operating status of the load balancer member to ERROR. A valid value is from 1 to 10. Default: 3
234+
232235
* `monitor-timeout`
233236
The maximum time, in seconds, that a monitor waits to connect backend before it times out. Default: 3
234237

pkg/openstack/loadbalancer.go

Lines changed: 50 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,13 @@ const (
8888
ServiceAnnotationLoadBalancerAvailabilityZone = "loadbalancer.openstack.org/availability-zone"
8989
// ServiceAnnotationLoadBalancerEnableHealthMonitor defines whether to create health monitor for the load balancer
9090
// pool, if not specified, use 'create-monitor' config. The health monitor can be created or deleted dynamically.
91-
ServiceAnnotationLoadBalancerEnableHealthMonitor = "loadbalancer.openstack.org/enable-health-monitor"
92-
ServiceAnnotationLoadBalancerHealthMonitorDelay = "loadbalancer.openstack.org/health-monitor-delay"
93-
ServiceAnnotationLoadBalancerHealthMonitorTimeout = "loadbalancer.openstack.org/health-monitor-timeout"
94-
ServiceAnnotationLoadBalancerHealthMonitorMaxRetries = "loadbalancer.openstack.org/health-monitor-max-retries"
95-
ServiceAnnotationLoadBalancerLoadbalancerHostname = "loadbalancer.openstack.org/hostname"
96-
ServiceAnnotationLoadBalancerAddress = "loadbalancer.openstack.org/load-balancer-address"
91+
ServiceAnnotationLoadBalancerEnableHealthMonitor = "loadbalancer.openstack.org/enable-health-monitor"
92+
ServiceAnnotationLoadBalancerHealthMonitorDelay = "loadbalancer.openstack.org/health-monitor-delay"
93+
ServiceAnnotationLoadBalancerHealthMonitorTimeout = "loadbalancer.openstack.org/health-monitor-timeout"
94+
ServiceAnnotationLoadBalancerHealthMonitorMaxRetries = "loadbalancer.openstack.org/health-monitor-max-retries"
95+
ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown = "loadbalancer.openstack.org/health-monitor-max-retries-down"
96+
ServiceAnnotationLoadBalancerLoadbalancerHostname = "loadbalancer.openstack.org/hostname"
97+
ServiceAnnotationLoadBalancerAddress = "loadbalancer.openstack.org/load-balancer-address"
9798
// revive:disable:var-naming
9899
ServiceAnnotationTlsContainerRef = "loadbalancer.openstack.org/default-tls-container-ref"
99100
// revive:enable:var-naming
@@ -325,33 +326,34 @@ func tagList(tags string) ([]string, bool, bool) {
325326

326327
// serviceConfig contains configurations for creating a Service.
327328
type serviceConfig struct {
328-
internal bool
329-
connLimit int
330-
configClassName string
331-
lbNetworkID string
332-
lbSubnetID string
333-
lbMemberSubnetID string
334-
lbPublicNetworkID string
335-
lbPublicSubnetSpec *floatingSubnetSpec
336-
keepClientIP bool
337-
enableProxyProtocol bool
338-
timeoutClientData int
339-
timeoutMemberConnect int
340-
timeoutMemberData int
341-
timeoutTCPInspect int
342-
allowedCIDR []string
343-
enableMonitor bool
344-
flavorID string
345-
availabilityZone string
346-
tlsContainerRef string
347-
lbID string
348-
lbName string
349-
supportLBTags bool
350-
healthCheckNodePort int
351-
healthMonitorDelay int
352-
healthMonitorTimeout int
353-
healthMonitorMaxRetries int
354-
preferredIPFamily corev1.IPFamily // preferred (the first) IP family indicated in service's `spec.ipFamilies`
329+
internal bool
330+
connLimit int
331+
configClassName string
332+
lbNetworkID string
333+
lbSubnetID string
334+
lbMemberSubnetID string
335+
lbPublicNetworkID string
336+
lbPublicSubnetSpec *floatingSubnetSpec
337+
keepClientIP bool
338+
enableProxyProtocol bool
339+
timeoutClientData int
340+
timeoutMemberConnect int
341+
timeoutMemberData int
342+
timeoutTCPInspect int
343+
allowedCIDR []string
344+
enableMonitor bool
345+
flavorID string
346+
availabilityZone string
347+
tlsContainerRef string
348+
lbID string
349+
lbName string
350+
supportLBTags bool
351+
healthCheckNodePort int
352+
healthMonitorDelay int
353+
healthMonitorTimeout int
354+
healthMonitorMaxRetries int
355+
healthMonitorMaxRetriesDown int
356+
preferredIPFamily corev1.IPFamily // preferred (the first) IP family indicated in service's `spec.ipFamilies`
355357
}
356358

357359
type listenerKey struct {
@@ -1091,11 +1093,15 @@ func (lbaas *LbaasV2) ensureOctaviaHealthMonitor(lbID string, name string, pool
10911093
}
10921094
monitorID = ""
10931095
}
1094-
if svcConf.healthMonitorDelay != monitor.Delay || svcConf.healthMonitorTimeout != monitor.Timeout || svcConf.healthMonitorMaxRetries != monitor.MaxRetries {
1096+
if svcConf.healthMonitorDelay != monitor.Delay ||
1097+
svcConf.healthMonitorTimeout != monitor.Timeout ||
1098+
svcConf.healthMonitorMaxRetries != monitor.MaxRetries ||
1099+
svcConf.healthMonitorMaxRetriesDown != monitor.MaxRetriesDown {
10951100
updateOpts := v2monitors.UpdateOpts{
1096-
Delay: svcConf.healthMonitorDelay,
1097-
Timeout: svcConf.healthMonitorTimeout,
1098-
MaxRetries: svcConf.healthMonitorMaxRetries,
1101+
Delay: svcConf.healthMonitorDelay,
1102+
Timeout: svcConf.healthMonitorTimeout,
1103+
MaxRetries: svcConf.healthMonitorMaxRetries,
1104+
MaxRetriesDown: svcConf.healthMonitorMaxRetriesDown,
10991105
}
11001106
klog.Infof("Updating health monitor %s updateOpts %+v", monitorID, updateOpts)
11011107
if err := openstackutil.UpdateHealthMonitor(lbaas.lb, monitorID, updateOpts); err != nil {
@@ -1142,10 +1148,11 @@ func (lbaas *LbaasV2) canUseHTTPMonitor(port corev1.ServicePort) bool {
11421148
// buildMonitorCreateOpts returns a v2monitors.CreateOpts without PoolID for consumption of both, fully popuplated Loadbalancers and Monitors.
11431149
func (lbaas *LbaasV2) buildMonitorCreateOpts(svcConf *serviceConfig, port corev1.ServicePort) v2monitors.CreateOpts {
11441150
opts := v2monitors.CreateOpts{
1145-
Type: string(port.Protocol),
1146-
Delay: svcConf.healthMonitorDelay,
1147-
Timeout: svcConf.healthMonitorTimeout,
1148-
MaxRetries: svcConf.healthMonitorMaxRetries,
1151+
Type: string(port.Protocol),
1152+
Delay: svcConf.healthMonitorDelay,
1153+
Timeout: svcConf.healthMonitorTimeout,
1154+
MaxRetries: svcConf.healthMonitorMaxRetries,
1155+
MaxRetriesDown: svcConf.healthMonitorMaxRetriesDown,
11491156
}
11501157
if port.Protocol == corev1.ProtocolUDP {
11511158
opts.Type = "UDP-CONNECT"
@@ -1580,6 +1587,7 @@ func (lbaas *LbaasV2) checkServiceUpdate(service *corev1.Service, nodes []*corev
15801587
svcConf.healthMonitorDelay = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorDelay, int(lbaas.opts.MonitorDelay.Duration.Seconds()))
15811588
svcConf.healthMonitorTimeout = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorTimeout, int(lbaas.opts.MonitorTimeout.Duration.Seconds()))
15821589
svcConf.healthMonitorMaxRetries = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetries, int(lbaas.opts.MonitorMaxRetries))
1590+
svcConf.healthMonitorMaxRetriesDown = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown, int(lbaas.opts.MonitorMaxRetriesDown))
15831591
return nil
15841592
}
15851593

@@ -1815,6 +1823,7 @@ func (lbaas *LbaasV2) checkService(service *corev1.Service, nodes []*corev1.Node
18151823
svcConf.healthMonitorDelay = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorDelay, int(lbaas.opts.MonitorDelay.Duration.Seconds()))
18161824
svcConf.healthMonitorTimeout = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorTimeout, int(lbaas.opts.MonitorTimeout.Duration.Seconds()))
18171825
svcConf.healthMonitorMaxRetries = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetries, int(lbaas.opts.MonitorMaxRetries))
1826+
svcConf.healthMonitorMaxRetriesDown = getIntFromServiceAnnotation(service, ServiceAnnotationLoadBalancerHealthMonitorMaxRetriesDown, int(lbaas.opts.MonitorMaxRetriesDown))
18181827
return nil
18191828
}
18201829

pkg/openstack/openstack.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ type LoadBalancerOpts struct {
9595
MonitorDelay util.MyDuration `gcfg:"monitor-delay"`
9696
MonitorTimeout util.MyDuration `gcfg:"monitor-timeout"`
9797
MonitorMaxRetries uint `gcfg:"monitor-max-retries"`
98+
MonitorMaxRetriesDown uint `gcfg:"monitor-max-retries-down"`
9899
ManageSecurityGroups bool `gcfg:"manage-security-groups"`
99100
NodeSecurityGroupIDs []string // Do not specify, get it automatically when enable manage-security-groups. TODO(FengyunPan): move it into cache
100101
InternalLB bool `gcfg:"internal-lb"` // default false
@@ -205,6 +206,7 @@ func ReadConfig(config io.Reader) (Config, error) {
205206
cfg.LoadBalancer.MonitorDelay = util.MyDuration{Duration: 5 * time.Second}
206207
cfg.LoadBalancer.MonitorTimeout = util.MyDuration{Duration: 3 * time.Second}
207208
cfg.LoadBalancer.MonitorMaxRetries = 1
209+
cfg.LoadBalancer.MonitorMaxRetriesDown = 3
208210
cfg.LoadBalancer.CascadeDelete = true
209211
cfg.LoadBalancer.EnableIngressHostname = false
210212
cfg.LoadBalancer.IngressHostnameSuffix = defaultProxyHostnameSuffix

pkg/openstack/openstack_test.go

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ func TestReadConfig(t *testing.T) {
9898
create-monitor = yes
9999
monitor-delay = 1m
100100
monitor-timeout = 30s
101-
monitor-max-retries = 3
101+
monitor-max-retries = 1
102+
monitor-max-retries-down = 3
102103
[Metadata]
103104
search-order = configDrive, metadataService
104105
`))
@@ -131,16 +132,19 @@ func TestReadConfig(t *testing.T) {
131132
}
132133

133134
if !cfg.LoadBalancer.CreateMonitor {
134-
t.Errorf("incorrect lb.createmonitor: %t", cfg.LoadBalancer.CreateMonitor)
135+
t.Errorf("incorrect lb.create-monitor: %t", cfg.LoadBalancer.CreateMonitor)
135136
}
136137
if cfg.LoadBalancer.MonitorDelay.Duration != 1*time.Minute {
137-
t.Errorf("incorrect lb.monitordelay: %s", cfg.LoadBalancer.MonitorDelay)
138+
t.Errorf("incorrect lb.monitor-delay: %s", cfg.LoadBalancer.MonitorDelay)
138139
}
139140
if cfg.LoadBalancer.MonitorTimeout.Duration != 30*time.Second {
140-
t.Errorf("incorrect lb.monitortimeout: %s", cfg.LoadBalancer.MonitorTimeout)
141+
t.Errorf("incorrect lb.monitor-timeout: %s", cfg.LoadBalancer.MonitorTimeout)
141142
}
142-
if cfg.LoadBalancer.MonitorMaxRetries != 3 {
143-
t.Errorf("incorrect lb.monitormaxretries: %d", cfg.LoadBalancer.MonitorMaxRetries)
143+
if cfg.LoadBalancer.MonitorMaxRetries != 1 {
144+
t.Errorf("incorrect lb.monitor-max-retries: %d", cfg.LoadBalancer.MonitorMaxRetries)
145+
}
146+
if cfg.LoadBalancer.MonitorMaxRetriesDown != 3 {
147+
t.Errorf("incorrect lb.monitor-max-retries-down: %d", cfg.LoadBalancer.MonitorMaxRetriesDown)
144148
}
145149
if cfg.Metadata.SearchOrder != "configDrive, metadataService" {
146150
t.Errorf("incorrect md.search-order: %v", cfg.Metadata.SearchOrder)
@@ -187,7 +191,8 @@ clouds:
187191
create-monitor = yes
188192
monitor-delay = 1m
189193
monitor-timeout = 30s
190-
monitor-max-retries = 3
194+
monitor-max-retries = 1
195+
monitor-max-retries-down = 3
191196
[Metadata]
192197
search-order = configDrive, metadataService
193198
`))
@@ -227,7 +232,15 @@ clouds:
227232

228233
// Make non-global sections dont get overwritten
229234
if !cfg.LoadBalancer.CreateMonitor {
230-
t.Errorf("incorrect lb.createmonitor: %t", cfg.LoadBalancer.CreateMonitor)
235+
t.Errorf("incorrect lb.create-monitor: %t", cfg.LoadBalancer.CreateMonitor)
236+
}
237+
238+
if cfg.LoadBalancer.MonitorMaxRetries != 1 {
239+
t.Errorf("incorrect lb.monitor-max-retries: %d", cfg.LoadBalancer.MonitorMaxRetries)
240+
}
241+
242+
if cfg.LoadBalancer.MonitorMaxRetriesDown != 3 {
243+
t.Errorf("incorrect lb.monitor-max-retries-down: %d", cfg.LoadBalancer.MonitorMaxRetriesDown)
231244
}
232245
}
233246

0 commit comments

Comments
 (0)