Skip to content

Commit 5cb3428

Browse files
otherwiseguykarelyatin
authored andcommitted
Never raise an exception in notify()
notify() is called from python-ovs code which is not built to recover from an exception in this user-overriden code. If there is an exception (e.g. the DB server is down when we process the hash ring), this exception can cause an unrecoverable error in processing OVSDB messages, rendering the neutron worker useless. Change-Id: I5f703d82175d71a222c76df37a82b5ccad890d14 (cherry picked from commit 67e616b)
1 parent 254d3d0 commit 5cb3428

File tree

1 file changed

+35
-31
lines changed

1 file changed

+35
-31
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -718,39 +718,43 @@ def handle_db_schema_changes(self, event, row):
718718
self.driver.agent_chassis_table = 'Chassis_Private'
719719

720720
def notify(self, event, row, updates=None):
721-
self.handle_db_schema_changes(event, row)
722-
self.notify_handler.notify(event, row, updates, global_=True)
723721
try:
724-
target_node = self._hash_ring.get_node(str(row.uuid))
725-
except exceptions.HashRingIsEmpty as e:
726-
LOG.error('HashRing is empty, error: %s', e)
727-
return
728-
if target_node != self._node_uuid:
729-
return
730-
731-
# If the worker hasn't been health checked by the maintenance
732-
# thread (see bug #1834498), indicate that it's alive here
733-
time_now = timeutils.utcnow()
734-
touch_timeout = time_now - datetime.timedelta(
735-
seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
736-
if not self._last_touch or touch_timeout >= self._last_touch:
737-
# NOTE(lucasagomes): Guard the db operation with an exception
738-
# handler. If heartbeating fails for whatever reason, log
739-
# the error and continue with processing the event
722+
self.handle_db_schema_changes(event, row)
723+
self.notify_handler.notify(event, row, updates, global_=True)
740724
try:
741-
ctx = neutron_context.get_admin_context()
742-
ovn_hash_ring_db.touch_node(ctx, self._node_uuid)
743-
self._last_touch = time_now
744-
except Exception:
745-
LOG.exception('Hash Ring node %s failed to heartbeat',
746-
self._node_uuid)
747-
748-
LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) '
749-
'handling event "%(event)s" for row %(row)s '
750-
'(table: %(table)s)',
751-
{'node': self._node_uuid, 'hostname': CONF.host,
752-
'event': event, 'row': row.uuid, 'table': row._table.name})
753-
self.notify_handler.notify(event, row, updates)
725+
target_node = self._hash_ring.get_node(str(row.uuid))
726+
except exceptions.HashRingIsEmpty as e:
727+
LOG.error('HashRing is empty, error: %s', e)
728+
return
729+
if target_node != self._node_uuid:
730+
return
731+
732+
# If the worker hasn't been health checked by the maintenance
733+
# thread (see bug #1834498), indicate that it's alive here
734+
time_now = timeutils.utcnow()
735+
touch_timeout = time_now - datetime.timedelta(
736+
seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
737+
if not self._last_touch or touch_timeout >= self._last_touch:
738+
# NOTE(lucasagomes): Guard the db operation with an exception
739+
# handler. If heartbeating fails for whatever reason, log
740+
# the error and continue with processing the event
741+
try:
742+
ctx = neutron_context.get_admin_context()
743+
ovn_hash_ring_db.touch_node(ctx, self._node_uuid)
744+
self._last_touch = time_now
745+
except Exception:
746+
LOG.exception('Hash Ring node %s failed to heartbeat',
747+
self._node_uuid)
748+
749+
LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) '
750+
'handling event "%(event)s" for row %(row)s '
751+
'(table: %(table)s)',
752+
{'node': self._node_uuid, 'hostname': CONF.host,
753+
'event': event, 'row': row.uuid,
754+
'table': row._table.name})
755+
self.notify_handler.notify(event, row, updates)
756+
except Exception as e:
757+
LOG.exception(e)
754758

755759
@abc.abstractmethod
756760
def post_connect(self):

0 commit comments

Comments
 (0)