Skip to content

Commit 0d3fe4f

Browse files
committed
Fix handling the restart of ovn-controllers
The previous `getattr(old, 'nb_cfg', False)` would evaluate to `False` if the `old` row either did not contain a `nb_cfg` value or if the value was 0. As 0 is the value set on startup of the ovn-controller this causes the neutron-api to ignore any event a ovn-controller directly sends after startup. In turn this causes us to miss the information that the agent is synchronized, causing the agent to appear as down, until something bumps the `nb_cfg` value globally. Closes-Bug: #1997982 Change-Id: Icec8fee93e64b871999f38674e305238e9705fd4 (cherry picked from commit 4cc611d)
1 parent deb0153 commit 0d3fe4f

File tree

2 files changed

+61
-1
lines changed

2 files changed

+61
-1
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ def match_fn(self, event, row, old=None):
336336
# don't update the AgentCache. We use chassis_private.chassis to return
337337
# data about the agent.
338338
return event == self.ROW_CREATE or (
339-
getattr(old, 'nb_cfg', False) and not
339+
hasattr(old, 'nb_cfg') and not
340340
(self.table == 'Chassis_Private' and not row.chassis))
341341

342342
def run(self, event, row, old):

neutron/tests/functional/plugins/ml2/drivers/ovn/mech_driver/ovsdb/test_ovsdb_monitor.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,66 @@ def check_agent_ts():
480480
self.fail('Chassis timestamp: %s, agent updated_at: %s' %
481481
(chassis_ts, str(agent.updated_at)))
482482

483+
def test_agent_restart(self):
484+
def check_agent_up():
485+
agent = neutron_agent.AgentCache()[self.chassis_name]
486+
return agent.alive
487+
488+
def check_agent_down():
489+
return not check_agent_up()
490+
491+
def check_nb_cfg_timestamp_is_not_null():
492+
agent = neutron_agent.AgentCache()[self.chassis_name]
493+
return agent.updated_at != 0
494+
495+
if not self.sb_api.is_table_present('Chassis_Private'):
496+
self.skipTest('Ovn sb not support Chassis_Private')
497+
498+
# Set nb_cfg to some realistic value, so that the alive check can
499+
# actually work
500+
self.nb_api.db_set(
501+
'NB_Global', '.', ('nb_cfg', 1337)).execute(check_error=True)
502+
self.sb_api.db_set(
503+
'Chassis_Private', self.chassis_name, ('nb_cfg', 1337)
504+
).execute(check_error=True)
505+
506+
chassis_uuid = self.sb_api.db_get(
507+
'Chassis', self.chassis_name, 'uuid').execute(check_error=True)
508+
509+
self.assertTrue(check_agent_up())
510+
n_utils.wait_until_true(check_nb_cfg_timestamp_is_not_null, timeout=5)
511+
512+
# Lets start by shutting down the ovn-controller
513+
# (where it will remove the Chassis_Private table entry)
514+
self.sb_api.db_destroy(
515+
'Chassis_Private', self.chassis_name).execute(check_error=True)
516+
try:
517+
n_utils.wait_until_true(check_agent_down, timeout=5)
518+
except n_utils.WaitTimeout:
519+
self.fail('Agent did not go down after Chassis_Private removal')
520+
521+
# Now the ovn-controller starts up again and has not yet synced with
522+
# the southbound database
523+
self.sb_api.db_create(
524+
'Chassis_Private', name=self.chassis_name,
525+
external_ids={}, chassis=chassis_uuid,
526+
nb_cfg_timestamp=0, nb_cfg=0
527+
).execute(check_error=True)
528+
self.assertTrue(check_agent_down())
529+
530+
# Now the ovn-controller has synced with the southbound database
531+
nb_cfg_timestamp = timeutils.utcnow_ts() * 1000
532+
with self.sb_api.transaction() as txn:
533+
txn.add(self.sb_api.db_set('Chassis_Private', self.chassis_name,
534+
('nb_cfg_timestamp', nb_cfg_timestamp)))
535+
txn.add(self.sb_api.db_set('Chassis_Private', self.chassis_name,
536+
('nb_cfg', 1337)))
537+
try:
538+
n_utils.wait_until_true(check_agent_up, timeout=5)
539+
except n_utils.WaitTimeout:
540+
self.fail('Agent did not go up after sync is done')
541+
self.assertTrue(check_nb_cfg_timestamp_is_not_null())
542+
483543

484544
class TestOvnIdlProbeInterval(base.TestOVNFunctionalBase):
485545
def setUp(self):

0 commit comments

Comments
 (0)