Skip to content

Commit 3785eac

Browse files
committed
[OVN] Hash Ring: Set nodes as offline upon exit
This patch implements the proposed solution from LP #2024205 where upon a Neutron being killed, it could trigger the deletion of the entries from the ovn_hash_ring table that matches the server hostname. When this happens on all controllers this could lead to the ovn_hash_ring being rendered empty which will result in ML2/OVN not processing any OVSDB events. Instead of removing the nodes from the ovn_hash_ring table at exit, this patch changes the code to just mark them as offline instead. That way, the nodes will remain registered in the table and the heartbeat thread will set them as online again on the next beat. If the service is stopped properly there won't be any heartbeat anymore and the nodes will be seeing as offline by the Hash Ring Manager (same as if they were deleted). For more info see LP #2024205. Conflicts: neutron/cmd/ovn/neutron_ovn_db_sync_util.py Closes-Bug: #2024205 Change-Id: I052841c87651773c4988fcf39f9f978094297704 Signed-off-by: Lucas Alvares Gomes <[email protected]> (cherry picked from commit f2e3ab3)
1 parent 73ba302 commit 3785eac

File tree

5 files changed

+41
-14
lines changed

5 files changed

+41
-14
lines changed

neutron/cmd/ovn/neutron_ovn_db_sync_util.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,12 @@ def post_fork_initialize(self, resource, event, trigger, **kwargs):
5757
def ovn_client(self):
5858
return self._ovn_client
5959

60-
def _clean_hash_ring(self):
61-
"""Don't clean the hash ring.
60+
def _set_hash_ring_nodes_offline(self):
61+
"""Don't set hash ring nodes as offline.
6262
63-
If this method was not overriden, cleanup would be performed when
64-
calling the db sync and running neutron server would lose all the nodes
65-
from the ring.
63+
If this method was not overridden, cleanup would be performed when
64+
calling the db sync and running neutron server would mark all the
65+
nodes from the ring as offline.
6666
"""
6767

6868
# Since we are not using the ovn mechanism driver while syncing,

neutron/db/ovn_hash_ring_db.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,12 @@ def remove_nodes_from_host(context, group_name):
5050
CONF.host, group_name)
5151

5252

53-
def _touch(context, **filter_args):
53+
def _touch(context, updated_at=None, **filter_args):
54+
if updated_at is None:
55+
updated_at = timeutils.utcnow()
5456
with db_api.CONTEXT_WRITER.using(context):
5557
context.session.query(ovn_models.OVNHashRing).filter_by(
56-
**filter_args).update({'updated_at': timeutils.utcnow()})
58+
**filter_args).update({'updated_at': updated_at})
5759

5860

5961
def touch_nodes_from_host(context, group_name):
@@ -92,3 +94,9 @@ def get_active_nodes(context, interval, group_name, from_host=False):
9294
def count_offline_nodes(context, interval, group_name):
9395
query = _get_nodes_query(context, interval, group_name, offline=True)
9496
return query.count()
97+
98+
99+
def set_nodes_from_host_as_offline(context, group_name):
100+
timestamp = datetime.datetime(day=26, month=10, year=1985, hour=9)
101+
_touch(context, updated_at=timestamp, hostname=CONF.host,
102+
group_name=group_name)

neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -270,15 +270,17 @@ def subscribe(self):
270270
resources.SECURITY_GROUP_RULE,
271271
events.BEFORE_DELETE)
272272

273-
def _clean_hash_ring(self, *args, **kwargs):
273+
def _set_hash_ring_nodes_offline(self, *args, **kwargs):
274274
admin_context = n_context.get_admin_context()
275-
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
276-
self.hash_ring_group)
275+
ovn_hash_ring_db.set_nodes_from_host_as_offline(
276+
admin_context, self.hash_ring_group)
277+
LOG.info('Hash Ring nodes from host "%s" marked as offline',
278+
cfg.CONF.host)
277279

278280
def pre_fork_initialize(self, resource, event, trigger, payload=None):
279281
"""Pre-initialize the ML2/OVN driver."""
280-
atexit.register(self._clean_hash_ring)
281-
signal.signal(signal.SIGTERM, self._clean_hash_ring)
282+
atexit.register(self._set_hash_ring_nodes_offline)
283+
signal.signal(signal.SIGTERM, self._set_hash_ring_nodes_offline)
282284
ovn_utils.create_neutron_pg_drop()
283285

284286
@staticmethod
@@ -298,7 +300,9 @@ def _setup_hash_ring(self):
298300
"""
299301
admin_context = n_context.get_admin_context()
300302
if not self._hash_ring_probe_event.is_set():
301-
self._clean_hash_ring()
303+
# Clear existing entries
304+
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
305+
self.hash_ring_group)
302306
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
303307
self.hash_ring_group)
304308
self._hash_ring_thread = maintenance.MaintenanceThread()

neutron/tests/functional/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ def trigger(self):
362362
self.addCleanup(self.stop)
363363
# NOTE(ralonsoh): do not access to the DB at exit when the SQL
364364
# connection is already closed, to avoid useless exception messages.
365-
mock.patch.object(self.mech_driver, '_clean_hash_ring').start()
365+
mock.patch.object(
366+
self.mech_driver, '_set_hash_ring_nodes_offline').start()
366367
self.mech_driver.pre_fork_initialize(
367368
mock.ANY, mock.ANY, trigger_cls.trigger)
368369

neutron/tests/unit/db/test_ovn_hash_ring_db.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,17 @@ def test_count_offline_nodes(self):
269269
# Assert no nodes are considered offline
270270
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
271271
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
272+
273+
def test_set_nodes_from_host_as_offline(self):
274+
self._add_nodes_and_assert_exists(count=3)
275+
276+
active_nodes = ovn_hash_ring_db.get_active_nodes(
277+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
278+
self.assertEqual(3, len(active_nodes))
279+
280+
ovn_hash_ring_db.set_nodes_from_host_as_offline(
281+
self.admin_ctx, HASH_RING_TEST_GROUP)
282+
283+
active_nodes = ovn_hash_ring_db.get_active_nodes(
284+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
285+
self.assertEqual(0, len(active_nodes))

0 commit comments

Comments
 (0)