File tree Expand file tree Collapse file tree 1 file changed +12
-5
lines changed
neutron/agent/ovn/metadata Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -202,14 +202,21 @@ def _update_chassis(self, row):
202
202
ovn_const .OVN_AGENT_METADATA_SB_CFG_KEY :
203
203
str (row .nb_cfg )})).execute ()
204
204
205
+ delay = 0
205
206
if self .first_run :
206
- interval = 0
207
207
self .first_run = False
208
208
else :
209
- interval = randint (0 , cfg .CONF .agent_down_time // 2 )
210
-
211
- LOG .debug ("Delaying updating chassis table for %s seconds" , interval )
212
- timer = threading .Timer (interval , _update_chassis , [self , row ])
209
+ # We occasionally see port binding failed errors due to
210
+ # the ml2 driver refusing to bind the port to a dead agent.
211
+ # if all agents heartbeat at the same time, they will all
212
+ # cause a load spike on the server. To mitigate that we
213
+ # need to spread out the load by introducing a random delay.
214
+ # clamp the max delay between 3 and 10 seconds.
215
+ max_delay = max (min (cfg .CONF .agent_down_time // 3 , 10 ), 3 )
216
+ delay = randint (0 , max_delay )
217
+
218
+ LOG .debug ("Delaying updating chassis table for %s seconds" , delay )
219
+ timer = threading .Timer (delay , _update_chassis , [self , row ])
213
220
timer .start ()
214
221
215
222
You can’t perform that action at this time.
0 commit comments