File tree Expand file tree Collapse file tree 1 file changed +12
-5
lines changed
neutron/agent/ovn/metadata Expand file tree Collapse file tree 1 file changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -202,14 +202,21 @@ def _update_chassis(self, row):
202202 ovn_const .OVN_AGENT_METADATA_SB_CFG_KEY :
203203 str (row .nb_cfg )})).execute ()
204204
205+ delay = 0
205206 if self .first_run :
206- interval = 0
207207 self .first_run = False
208208 else :
209- interval = randint (0 , cfg .CONF .agent_down_time // 2 )
210-
211- LOG .debug ("Delaying updating chassis table for %s seconds" , interval )
212- timer = threading .Timer (interval , _update_chassis , [self , row ])
209+ # We occasionally see port binding failed errors due to
210+ # the ml2 driver refusing to bind the port to a dead agent.
211+ # if all agents heartbeat at the same time, they will all
212+ # cause a load spike on the server. To mitigate that we
213+ # need to spread out the load by introducing a random delay.
214+ # clamp the max delay between 3 and 10 seconds.
215+ max_delay = max (min (cfg .CONF .agent_down_time // 3 , 10 ), 3 )
216+ delay = randint (0 , max_delay )
217+
218+ LOG .debug ("Delaying updating chassis table for %s seconds" , delay )
219+ timer = threading .Timer (delay , _update_chassis , [self , row ])
213220 timer .start ()
214221
215222
You can’t perform that action at this time.
0 commit comments