Skip to content

Commit 38ac223

Browse files
felixhuettners10
authored andcommitted
fix netns deletion of broken namespaces
normal network namespaces are bind-mounted to files under /var/run/netns. If a process deleting a network namespace gets killed during that operation there is the chance that the bind mount to the netns has been removed, but the file under /var/run/netns still exists. When the neutron-ovn-metadata-agent tries to clean up such network namespaces it first tires to validate that the network namespace is empty. For the cases described above this fails, as this network namespace no longer really exists, but is just a stray file laying around. To fix this we treat network namespaces where we get an `OSError` with errno 22 (Invalid Argument) as empty. The calls to pyroute2 to delete the namespace will then clean up the file. Additionally we add a guard to teardown_datapath to continue even if this fails. failing to remove a datapath is not critical and leaves in the worst case a process and a network namespace running, however previously it would have also prevented the creation of new datapaths which is critical for VM startup. Conflicts: neutron/tests/unit/agent/ovn/metadata/test_agent.py Closes-Bug: #2037102 Change-Id: I7c43812fed5903f98a2e491076c24a8d926a59b4 (cherry picked from commit 566fea3) (cherry picked from commit 69c49c4)
1 parent cf21a4d commit 38ac223

File tree

4 files changed

+55
-2
lines changed

4 files changed

+55
-2
lines changed

neutron/agent/linux/ip_lib.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,22 @@ def ensure_namespace(self, name):
260260
return ip
261261

262262
def namespace_is_empty(self):
263-
return not self.get_devices()
263+
try:
264+
return not self.get_devices()
265+
except OSError as e:
266+
# This can happen if we previously got terminated in the middle of
267+
# removing this namespace. In this case the bind mount of the
268+
# namespace under /var/run/netns will be removed, but the namespace
269+
# file is still there. As the bind mount is gone we can no longer
270+
# access the namespace to validate that it is empty. But since it
271+
# should have already been removed we are sure that the check has
272+
# passed the last time and since the namespace is unuseable that
273+
# can not have changed.
274+
# Future calls to pyroute2 to remove that namespace will clean up
275+
# the leftover file.
276+
if e.errno == errno.EINVAL:
277+
return True
278+
raise e
264279

265280
def garbage_collect_namespace(self):
266281
"""Conditionally destroy the namespace if it is empty."""

neutron/agent/ovn/metadata/agent.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,10 @@ def sync(self):
371371
ns.startswith(NS_PREFIX) and
372372
ns not in metadata_namespaces]
373373
for ns in unused_namespaces:
374-
self.teardown_datapath(self._get_datapath_name(ns))
374+
try:
375+
self.teardown_datapath(self._get_datapath_name(ns))
376+
except Exception:
377+
LOG.exception('Error unable to destroy namespace: %s', ns)
375378

376379
# resync all network namespaces based on the associated datapaths,
377380
# even those that are already running. This is to make sure

neutron/tests/unit/agent/linux/test_ip_lib.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,21 @@ def test_garbage_collect_namespace_existing_not_empty(self):
357357
self.assertNotIn(mock.call().delete('ns'),
358358
ip_ns_cmd_cls.mock_calls)
359359

360+
def test_garbage_collect_namespace_existing_broken(self):
361+
with mock.patch.object(ip_lib, 'IpNetnsCommand') as ip_ns_cmd_cls:
362+
ip_ns_cmd_cls.return_value.exists.return_value = True
363+
364+
ip = ip_lib.IPWrapper(namespace='ns')
365+
366+
with mock.patch.object(ip, 'get_devices',
367+
side_effect=OSError(errno.EINVAL, None)
368+
) as mock_get_devices:
369+
self.assertTrue(ip.garbage_collect_namespace())
370+
371+
mock_get_devices.assert_called_once_with()
372+
expected = [mock.call().delete('ns')]
373+
ip_ns_cmd_cls.assert_has_calls(expected)
374+
360375
@mock.patch.object(priv_lib, 'create_interface')
361376
def test_add_vlan(self, create):
362377
retval = ip_lib.IPWrapper().add_vlan('eth0.1', 'eth0', '1')

neutron/tests/unit/agent/ovn/metadata/test_agent.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,26 @@ def test_sync_teardown_namespace(self):
124124
lnn.assert_called_once_with()
125125
tdp.assert_called_once_with('3')
126126

127+
def test_sync_teardown_namespace_does_not_crash_on_error(self):
128+
"""Test that sync tears down unneeded metadata namespaces.
129+
Even if that fails it continues to provision other datapaths
130+
"""
131+
with mock.patch.object(
132+
self.agent, 'provision_datapath') as pdp,\
133+
mock.patch.object(
134+
ip_lib, 'list_network_namespaces',
135+
return_value=['ovnmeta-1', 'ovnmeta-2', 'ovnmeta-3',
136+
'ns1', 'ns2']) as lnn,\
137+
mock.patch.object(
138+
self.agent, 'teardown_datapath',
139+
side_effect=Exception()) as tdp:
140+
self.agent.sync()
141+
142+
pdp.assert_has_calls([mock.call(p) for p in self.ports],
143+
any_order=True)
144+
lnn.assert_called_once_with()
145+
tdp.assert_called_once_with('3')
146+
127147
def test_get_networks_port_bindings(self):
128148
"""Test get_networks_port_bindings returns only the port binding
129149
objects for ports with VIF type empty ('') or 'external'.

0 commit comments

Comments
 (0)