Skip to content

Commit abb31b6

Browse files
felixhuettnermiguellavalle
authored andcommitted
fix netns deletion of broken namespaces
normal network namespaces are bind-mounted to files under /var/run/netns. If a process deleting a network namespace gets killed during that operation there is the chance that the bind mount to the netns has been removed, but the file under /var/run/netns still exists. When the neutron-ovn-metadata-agent tries to clean up such network namespaces it first tires to validate that the network namespace is empty. For the cases described above this fails, as this network namespace no longer really exists, but is just a stray file laying around. To fix this we treat network namespaces where we get an `OSError` with errno 22 (Invalid Argument) as empty. The calls to pyroute2 to delete the namespace will then clean up the file. Additionally we add a guard to teardown_datapath to continue even if this fails. failing to remove a datapath is not critical and leaves in the worst case a process and a network namespace running, however previously it would have also prevented the creation of new datapaths which is critical for VM startup. Closes-Bug: #2037102 Change-Id: I7c43812fed5903f98a2e491076c24a8d926a59b4 (cherry picked from commit 566fea3)
1 parent 2903975 commit abb31b6

File tree

4 files changed

+60
-2
lines changed

4 files changed

+60
-2
lines changed

neutron/agent/linux/ip_lib.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,22 @@ def ensure_namespace(self, name):
260260
return ip
261261

262262
def namespace_is_empty(self):
263-
return not self.get_devices()
263+
try:
264+
return not self.get_devices()
265+
except OSError as e:
266+
# This can happen if we previously got terminated in the middle of
267+
# removing this namespace. In this case the bind mount of the
268+
# namespace under /var/run/netns will be removed, but the namespace
269+
# file is still there. As the bind mount is gone we can no longer
270+
# access the namespace to validate that it is empty. But since it
271+
# should have already been removed we are sure that the check has
272+
# passed the last time and since the namespace is unuseable that
273+
# can not have changed.
274+
# Future calls to pyroute2 to remove that namespace will clean up
275+
# the leftover file.
276+
if e.errno == errno.EINVAL:
277+
return True
278+
raise e
264279

265280
def garbage_collect_namespace(self):
266281
"""Conditionally destroy the namespace if it is empty."""

neutron/agent/ovn/metadata/agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,10 @@ def sync(self):
371371
ns.startswith(NS_PREFIX) and
372372
ns not in metadata_namespaces]
373373
for ns in unused_namespaces:
374-
self.teardown_datapath(self._get_datapath_name(ns))
374+
try:
375+
self.teardown_datapath(self._get_datapath_name(ns))
376+
except Exception:
377+
LOG.exception('Error unable to destroy namespace: %s', ns)
375378

376379
# resync all network namespaces based on the associated datapaths,
377380
# even those that are already running. This is to make sure

neutron/tests/unit/agent/linux/test_ip_lib.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,21 @@ def test_garbage_collect_namespace_existing_not_empty(self):
356356
self.assertNotIn(mock.call().delete('ns'),
357357
ip_ns_cmd_cls.mock_calls)
358358

359+
def test_garbage_collect_namespace_existing_broken(self):
360+
with mock.patch.object(ip_lib, 'IpNetnsCommand') as ip_ns_cmd_cls:
361+
ip_ns_cmd_cls.return_value.exists.return_value = True
362+
363+
ip = ip_lib.IPWrapper(namespace='ns')
364+
365+
with mock.patch.object(ip, 'get_devices',
366+
side_effect=OSError(errno.EINVAL, None)
367+
) as mock_get_devices:
368+
self.assertTrue(ip.garbage_collect_namespace())
369+
370+
mock_get_devices.assert_called_once_with()
371+
expected = [mock.call().delete('ns')]
372+
ip_ns_cmd_cls.assert_has_calls(expected)
373+
359374
@mock.patch.object(priv_lib, 'create_interface')
360375
def test_add_vlan(self, create):
361376
retval = ip_lib.IPWrapper().add_vlan('eth0.1', 'eth0', '1')

neutron/tests/unit/agent/ovn/metadata/test_agent.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,31 @@ def test_sync_teardown_namespace(self):
134134
lnn.assert_called_once_with()
135135
tdp.assert_called_once_with('3')
136136

137+
def test_sync_teardown_namespace_does_not_crash_on_error(self):
138+
"""Test that sync tears down unneeded metadata namespaces.
139+
Even if that fails it continues to provision other datapaths
140+
"""
141+
with mock.patch.object(
142+
self.agent, 'provision_datapath') as pdp,\
143+
mock.patch.object(
144+
ip_lib, 'list_network_namespaces',
145+
return_value=['ovnmeta-1', 'ovnmeta-2', 'ovnmeta-3',
146+
'ns1', 'ns2']) as lnn,\
147+
mock.patch.object(
148+
self.agent, 'teardown_datapath',
149+
side_effect=Exception()) as tdp:
150+
self.agent.sync()
151+
152+
pdp.assert_has_calls(
153+
[
154+
mock.call(p.datapath)
155+
for p in self.ports
156+
],
157+
any_order=True
158+
)
159+
lnn.assert_called_once_with()
160+
tdp.assert_called_once_with('3')
161+
137162
def test_get_networks_datapaths(self):
138163
"""Test get_networks_datapaths returns only datapath objects for the
139164
networks containing vif ports of type ''(blank) and 'external'.

0 commit comments

Comments
 (0)