Skip to content

Commit df41288

Browse files
committed
[OVN] Hash Ring: Set nodes as offline upon exit
This patch implements the proposed solution from LP #2024205 where upon a Neutron being killed, it could trigger the deletion of the entries from the ovn_hash_ring table that matches the server hostname. When this happens on all controllers this could lead to the ovn_hash_ring being rendered empty which will result in ML2/OVN not processing any OVSDB events. Instead of removing the nodes from the ovn_hash_ring table at exit, this patch changes the code to just mark them as offline instead. That way, the nodes will remain registered in the table and the heartbeat thread will set them as online again on the next beat. If the service is stopped properly there won't be any heartbeat anymore and the nodes will be seeing as offline by the Hash Ring Manager (same as if they were deleted). For more info see LP #2024205. Closes-Bug: #2024205 Change-Id: I052841c87651773c4988fcf39f9f978094297704 Signed-off-by: Lucas Alvares Gomes <[email protected]> (cherry picked from commit f2e3ab3)
1 parent 52cc015 commit df41288

File tree

5 files changed

+40
-13
lines changed

5 files changed

+40
-13
lines changed

neutron/cmd/ovn/neutron_ovn_db_sync_util.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@ def post_fork_initialize(self, resource, event, trigger, **kwargs):
5858
def ovn_client(self):
5959
return self._ovn_client
6060

61-
def _clean_hash_ring(self):
62-
"""Don't clean the hash ring.
61+
def _set_hash_ring_nodes_offline(self):
62+
"""Don't set hash ring nodes as offline.
6363
6464
If this method was not overridden, cleanup would be performed when
65-
calling the db sync and running neutron server would lose all the nodes
66-
from the ring.
65+
calling the db sync and running neutron server would mark all the
66+
nodes from the ring as offline.
6767
"""
6868

6969
# Since we are not using the ovn mechanism driver while syncing,

neutron/db/ovn_hash_ring_db.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,12 @@ def remove_nodes_from_host(context, group_name):
5050
CONF.host, group_name)
5151

5252

53-
def _touch(context, **filter_args):
53+
def _touch(context, updated_at=None, **filter_args):
54+
if updated_at is None:
55+
updated_at = timeutils.utcnow()
5456
with db_api.CONTEXT_WRITER.using(context):
5557
context.session.query(ovn_models.OVNHashRing).filter_by(
56-
**filter_args).update({'updated_at': timeutils.utcnow()})
58+
**filter_args).update({'updated_at': updated_at})
5759

5860

5961
def touch_nodes_from_host(context, group_name):
@@ -92,3 +94,9 @@ def get_active_nodes(context, interval, group_name, from_host=False):
9294
def count_offline_nodes(context, interval, group_name):
9395
query = _get_nodes_query(context, interval, group_name, offline=True)
9496
return query.count()
97+
98+
99+
def set_nodes_from_host_as_offline(context, group_name):
100+
timestamp = datetime.datetime(day=26, month=10, year=1985, hour=9)
101+
_touch(context, updated_at=timestamp, hostname=CONF.host,
102+
group_name=group_name)

neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -282,15 +282,17 @@ def subscribe(self):
282282
resources.SECURITY_GROUP_RULE,
283283
events.BEFORE_DELETE)
284284

285-
def _clean_hash_ring(self, *args, **kwargs):
285+
def _set_hash_ring_nodes_offline(self, *args, **kwargs):
286286
admin_context = n_context.get_admin_context()
287-
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
288-
self.hash_ring_group)
287+
ovn_hash_ring_db.set_nodes_from_host_as_offline(
288+
admin_context, self.hash_ring_group)
289+
LOG.info('Hash Ring nodes from host "%s" marked as offline',
290+
cfg.CONF.host)
289291

290292
def pre_fork_initialize(self, resource, event, trigger, payload=None):
291293
"""Pre-initialize the ML2/OVN driver."""
292-
atexit.register(self._clean_hash_ring)
293-
signal.signal(signal.SIGTERM, self._clean_hash_ring)
294+
atexit.register(self._set_hash_ring_nodes_offline)
295+
signal.signal(signal.SIGTERM, self._set_hash_ring_nodes_offline)
294296
ovn_utils.create_neutron_pg_drop()
295297

296298
@staticmethod
@@ -310,7 +312,9 @@ def _setup_hash_ring(self):
310312
"""
311313
admin_context = n_context.get_admin_context()
312314
if not self._hash_ring_probe_event.is_set():
313-
self._clean_hash_ring()
315+
# Clear existing entries
316+
ovn_hash_ring_db.remove_nodes_from_host(admin_context,
317+
self.hash_ring_group)
314318
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
315319
self.hash_ring_group)
316320
self._hash_ring_thread = maintenance.MaintenanceThread()

neutron/tests/functional/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ def trigger(self):
362362
self.addCleanup(self.stop)
363363
# NOTE(ralonsoh): do not access to the DB at exit when the SQL
364364
# connection is already closed, to avoid useless exception messages.
365-
mock.patch.object(self.mech_driver, '_clean_hash_ring').start()
365+
mock.patch.object(
366+
self.mech_driver, '_set_hash_ring_nodes_offline').start()
366367
self.mech_driver.pre_fork_initialize(
367368
mock.ANY, mock.ANY, trigger_cls.trigger)
368369

neutron/tests/unit/db/test_ovn_hash_ring_db.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,17 @@ def test_count_offline_nodes(self):
269269
# Assert no nodes are considered offline
270270
self.assertEqual(0, ovn_hash_ring_db.count_offline_nodes(
271271
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP))
272+
273+
def test_set_nodes_from_host_as_offline(self):
274+
self._add_nodes_and_assert_exists(count=3)
275+
276+
active_nodes = ovn_hash_ring_db.get_active_nodes(
277+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
278+
self.assertEqual(3, len(active_nodes))
279+
280+
ovn_hash_ring_db.set_nodes_from_host_as_offline(
281+
self.admin_ctx, HASH_RING_TEST_GROUP)
282+
283+
active_nodes = ovn_hash_ring_db.get_active_nodes(
284+
self.admin_ctx, interval=60, group_name=HASH_RING_TEST_GROUP)
285+
self.assertEqual(0, len(active_nodes))

0 commit comments

Comments
 (0)