Skip to content

Commit 69c49c4

Browse files
committed
fix netns deletion of broken namespaces
normal network namespaces are bind-mounted to files under /var/run/netns. If a process deleting a network namespace gets killed during that operation there is the chance that the bind mount to the netns has been removed, but the file under /var/run/netns still exists. When the neutron-ovn-metadata-agent tries to clean up such network namespaces it first tires to validate that the network namespace is empty. For the cases described above this fails, as this network namespace no longer really exists, but is just a stray file laying around. To fix this we treat network namespaces where we get an `OSError` with errno 22 (Invalid Argument) as empty. The calls to pyroute2 to delete the namespace will then clean up the file. Additionally we add a guard to teardown_datapath to continue even if this fails. failing to remove a datapath is not critical and leaves in the worst case a process and a network namespace running, however previously it would have also prevented the creation of new datapaths which is critical for VM startup. Closes-Bug: #2037102 Change-Id: I7c43812fed5903f98a2e491076c24a8d926a59b4 (cherry picked from commit 566fea3)
1 parent 94cf7a4 commit 69c49c4

File tree

4 files changed

+60
-2
lines changed

4 files changed

+60
-2
lines changed

neutron/agent/linux/ip_lib.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,22 @@ def ensure_namespace(self, name):
259259
return ip
260260

261261
def namespace_is_empty(self):
262-
return not self.get_devices()
262+
try:
263+
return not self.get_devices()
264+
except OSError as e:
265+
# This can happen if we previously got terminated in the middle of
266+
# removing this namespace. In this case the bind mount of the
267+
# namespace under /var/run/netns will be removed, but the namespace
268+
# file is still there. As the bind mount is gone we can no longer
269+
# access the namespace to validate that it is empty. But since it
270+
# should have already been removed we are sure that the check has
271+
# passed the last time and since the namespace is unuseable that
272+
# can not have changed.
273+
# Future calls to pyroute2 to remove that namespace will clean up
274+
# the leftover file.
275+
if e.errno == errno.EINVAL:
276+
return True
277+
raise e
263278

264279
def garbage_collect_namespace(self):
265280
"""Conditionally destroy the namespace if it is empty."""

neutron/agent/ovn/metadata/agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,10 @@ def sync(self, provision=True):
430430
ns.startswith(NS_PREFIX) and
431431
ns not in metadata_namespaces]
432432
for ns in unused_namespaces:
433-
self.teardown_datapath(self._get_datapath_name(ns))
433+
try:
434+
self.teardown_datapath(self._get_datapath_name(ns))
435+
except Exception:
436+
LOG.exception('Error unable to destroy namespace: %s', ns)
434437

435438
# resync all network namespaces based on the associated datapaths,
436439
# even those that are already running. This is to make sure

neutron/tests/unit/agent/linux/test_ip_lib.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,21 @@ def test_garbage_collect_namespace_existing_not_empty(self):
357357
self.assertNotIn(mock.call().delete('ns'),
358358
ip_ns_cmd_cls.mock_calls)
359359

360+
def test_garbage_collect_namespace_existing_broken(self):
361+
with mock.patch.object(ip_lib, 'IpNetnsCommand') as ip_ns_cmd_cls:
362+
ip_ns_cmd_cls.return_value.exists.return_value = True
363+
364+
ip = ip_lib.IPWrapper(namespace='ns')
365+
366+
with mock.patch.object(ip, 'get_devices',
367+
side_effect=OSError(errno.EINVAL, None)
368+
) as mock_get_devices:
369+
self.assertTrue(ip.garbage_collect_namespace())
370+
371+
mock_get_devices.assert_called_once_with()
372+
expected = [mock.call().delete('ns')]
373+
ip_ns_cmd_cls.assert_has_calls(expected)
374+
360375
@mock.patch.object(priv_lib, 'create_interface')
361376
def test_add_vlan(self, create):
362377
retval = ip_lib.IPWrapper().add_vlan('eth0.1', 'eth0', '1')

neutron/tests/unit/agent/ovn/metadata/test_agent.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,31 @@ def test_sync_teardown_namespace(self):
134134
lnn.assert_called_once_with()
135135
tdp.assert_called_once_with('3')
136136

137+
def test_sync_teardown_namespace_does_not_crash_on_error(self):
138+
"""Test that sync tears down unneeded metadata namespaces.
139+
Even if that fails it continues to provision other datapaths
140+
"""
141+
with mock.patch.object(
142+
self.agent, 'provision_datapath') as pdp,\
143+
mock.patch.object(
144+
ip_lib, 'list_network_namespaces',
145+
return_value=['ovnmeta-1', 'ovnmeta-2', 'ovnmeta-3',
146+
'ns1', 'ns2']) as lnn,\
147+
mock.patch.object(
148+
self.agent, 'teardown_datapath',
149+
side_effect=Exception()) as tdp:
150+
self.agent.sync()
151+
152+
pdp.assert_has_calls(
153+
[
154+
mock.call(p.datapath)
155+
for p in self.ports
156+
],
157+
any_order=True
158+
)
159+
lnn.assert_called_once_with()
160+
tdp.assert_called_once_with('3')
161+
137162
def test_get_networks_datapaths(self):
138163
"""Test get_networks_datapaths returns only datapath objects for the
139164
networks containing vif ports of type ''(blank) and 'external'.

0 commit comments

Comments
 (0)