Skip to content

Commit dd7fc47

Browse files
committed
[OVN] Avoid deadlock when cleaning hash ring nodes
This patch avoids the clash of the hash ring cleaning operation and the API workers by ensuring that the cleaning happens before the nodes for that host are added to the ring and the connections to the OVSDBs (meaning no events therefore no SELECTS on the hash ring table for that hostname). This patch does this by re-using the same hash ring lock that starts the probing thread. Now, the first worker that acquire the lock is responsible for cleaning the hash ring for it's own host as well as starting the probing thread. Subsequently workers only need to register themselves to the hash ring. Conflicts: neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py Change-Id: Iba73f7944592a003232eb397ba1d4da3dcba5c3a Closes-Bug: #1990174 Signed-off-by: Lucas Alvares Gomes <[email protected]> (cherry picked from commit b7b8f7c)
1 parent 025f3e3 commit dd7fc47

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,6 @@ def initialize(self):
117117
self.node_uuid = None
118118
self.hash_ring_group = ovn_const.HASH_RING_ML2_GROUP
119119
self.sg_enabled = ovn_acl.is_sg_enabled()
120-
# NOTE(lucasagomes): _clean_hash_ring() must be called before
121-
# self.subscribe() to avoid processes racing when adding or
122-
# deleting nodes from the Hash Ring during service initialization
123-
self._clean_hash_ring()
124120
self._post_fork_event = threading.Event()
125121
if cfg.CONF.SECURITYGROUP.firewall_driver:
126122
LOG.warning('Firewall driver configuration is ignored')
@@ -331,15 +327,29 @@ def should_post_fork_initialize(worker_class):
331327
service.RpcWorker)
332328

333329
@lockutils.synchronized('hash_ring_probe_lock', external=True)
334-
def _start_hash_ring_probe(self):
330+
def _setup_hash_ring(self):
331+
"""Setup the hash ring.
332+
333+
The first worker to acquire the lock is responsible for cleaning
334+
the hash ring from previous runs as well as start the probing
335+
thread for this host. Subsequently workers just need to register
336+
themselves to the hash ring.
337+
"""
338+
admin_context = n_context.get_admin_context()
335339
if not self._hash_ring_probe_event.is_set():
340+
self._clean_hash_ring()
341+
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
342+
self.hash_ring_group)
336343
self._hash_ring_thread = maintenance.MaintenanceThread()
337344
self._hash_ring_thread.add_periodics(
338345
maintenance.HashRingHealthCheckPeriodics(
339346
self.hash_ring_group))
340347
self._hash_ring_thread.start()
341348
LOG.info("Hash Ring probing thread has started")
342349
self._hash_ring_probe_event.set()
350+
else:
351+
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
352+
self.hash_ring_group)
343353

344354
def post_fork_initialize(self, resource, event, trigger, payload=None):
345355
# Initialize API/Maintenance workers with OVN IDL connections
@@ -351,10 +361,7 @@ def post_fork_initialize(self, resource, event, trigger, payload=None):
351361
self._ovn_client_inst = None
352362

353363
if worker_class == neutron.wsgi.WorkerService:
354-
admin_context = n_context.get_admin_context()
355-
self.node_uuid = ovn_hash_ring_db.add_node(admin_context,
356-
self.hash_ring_group)
357-
self._start_hash_ring_probe()
364+
self._setup_hash_ring()
358365

359366
n_agent.AgentCache(self) # Initialize singleton agent cache
360367
self.nb_ovn, self.sb_ovn = impl_idl_ovn.get_ovn_idls(self, trigger)

0 commit comments

Comments
 (0)