Skip to content

Commit defb601

Browse files
committed
Suppress IPv6 metadata DAD failure and delete address
IPv4 DAD is non-existent in Linux or its failure is silent, so we never needed to catch and ignore it. On the other hand IPv6 DAD failure is explicit, hence comes this change. This of course leaves the metadata service dead on hosts where duplicate address detection failed. But if we catch the DADFailed exception and delete the address, at least other functions of the dhcp-agent should not be affected. With this the IPv6 isolated metadata service is not redundant, which is the best we can do without a redesign. Also document the promised service level of isolated metadata. Added additional tests for the metadata driver as well. Change-Id: I6b544c5528cb22e5e8846fc47dfb8b05f70f975c Partial-Bug: #1953165 (cherry picked from commit 2aee961) (cherry picked from commit 071255f) (cherry picked from commit 1c61528)
1 parent 28dd08a commit defb601

File tree

11 files changed

+139
-27
lines changed

11 files changed

+139
-27
lines changed

doc/source/admin/config-dhcp-ha.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,38 @@ To test the HA of DHCP agent:
442442

443443
#. Start DHCP agent on HostB. The VM gets the wanted IP again.
444444

445+
No HA for metadata service on isolated networks
446+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
447+
448+
All Neutron backends using the DHCP agent can also provide `metadata service
449+
<https://docs.openstack.org/nova/latest/user/metadata.html>`_ in isolated
450+
networks (i.e. networks without a router). In this case the DHCP agent manages
451+
the metadata service (see config option `enable_isolated_metadata
452+
<https://docs.openstack.org/neutron/latest/configuration/dhcp-agent.html#DEFAULT.enable_isolated_metadata>`_).
453+
454+
Note however that the metadata service is only redundant for IPv4, and not
455+
IPv6, even when the DHCP service is configured to be highly available
456+
(config option `dhcp_agents_per_network
457+
<https://docs.openstack.org/neutron/latest/configuration/neutron.html#DEFAULT.dhcp_agents_per_network>`_
458+
> 1). This is because the DHCP agent will insert a route to the well known
459+
metadata IPv4 address (`169.254.169.254`) via its own IP address, so it will
460+
be reachable as long as the DHCP service is available at that IP address.
461+
This also means that recovery after a failure is tied to the renewal of the
462+
DHCP lease, since that route will only change if the DHCP server for a VM
463+
changes.
464+
465+
With IPv6, the well known metadata IPv6 address (`fe80::a9fe:a9fe`) is used,
466+
but directly configured in the DHCP agent network namespace.
467+
Due to the enforcement of duplicate address detection (DAD), this address
468+
can only be configured in at most one DHCP network namespaces at any time.
469+
See `RFC 4862 <https://www.rfc-editor.org/rfc/rfc4862#section-5.4>`_ for
470+
details on the DAD process.
471+
472+
For this reason, even when you have multiple DHCP agents, an arbitrary one
473+
(where the metadata IPv6 address is not in `dadfailed` state) will serve all
474+
metadata requests over IPv6. When that metadata service instance becomes
475+
unreachable there is no failover and the service will become unreachable.
476+
445477
Disabling and removing an agent
446478
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
447479

neutron/agent/linux/dhcp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from neutron.agent.linux import ip_lib
4141
from neutron.agent.linux import iptables_manager
4242
from neutron.cmd import runtime_checks as checks
43+
from neutron.common import _constants as common_constants
4344
from neutron.common import utils as common_utils
4445
from neutron.ipam import utils as ipam_utils
4546
from neutron.privileged.agent.linux import dhcp as priv_dhcp
@@ -1773,7 +1774,7 @@ def setup(self, network):
17731774
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
17741775
ip_cidrs.append(constants.METADATA_CIDR)
17751776
if netutils.is_ipv6_enabled():
1776-
ip_cidrs.append(constants.METADATA_V6_CIDR)
1777+
ip_cidrs.append(common_constants.METADATA_V6_CIDR)
17771778

17781779
self.driver.init_l3(interface_name, ip_cidrs,
17791780
namespace=network.namespace)

neutron/agent/linux/ip_lib.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class AddressNotReady(exceptions.NeutronException):
103103
"become ready: %(reason)s")
104104

105105

106+
class DADFailed(AddressNotReady):
107+
pass
108+
109+
106110
InvalidArgument = privileged.InvalidArgument
107111

108112

@@ -581,7 +585,7 @@ def wait_until_address_ready(self, address, wait_time=30):
581585
"""Wait until an address is no longer marked 'tentative' or 'dadfailed'
582586
583587
raises AddressNotReady if times out, address not present on interface
584-
or DAD fails
588+
raises DADFailed if Duplicate Address Detection fails
585589
"""
586590
def is_address_ready():
587591
try:
@@ -593,7 +597,7 @@ def is_address_ready():
593597
# Since both 'dadfailed' and 'tentative' will be set if DAD fails,
594598
# check 'dadfailed' first just to be explicit
595599
if addr_info['dadfailed']:
596-
raise AddressNotReady(
600+
raise DADFailed(
597601
address=address, reason=_('Duplicate address detected'))
598602
if addr_info['tentative']:
599603
return False

neutron/agent/metadata/driver.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from neutron.agent.linux import external_process
3434
from neutron.agent.linux import ip_lib
3535
from neutron.agent.linux import utils as linux_utils
36+
from neutron.common import _constants as common_constants
3637
from neutron.common import coordination
3738
from neutron.common import utils as common_utils
3839

@@ -266,9 +267,30 @@ def spawn_monitored_metadata_proxy(cls, monitor, ns_name, port, conf,
266267
# HAProxy cannot bind() until IPv6 Duplicate Address Detection
267268
# completes. We must wait until the address leaves its 'tentative'
268269
# state.
269-
ip_lib.IpAddrCommand(
270-
parent=ip_lib.IPDevice(name=bind_interface, namespace=ns_name)
271-
).wait_until_address_ready(address=bind_address_v6)
270+
try:
271+
ip_lib.IpAddrCommand(
272+
parent=ip_lib.IPDevice(name=bind_interface,
273+
namespace=ns_name)
274+
).wait_until_address_ready(address=bind_address_v6)
275+
except ip_lib.DADFailed as exc:
276+
# This failure means that another DHCP agent has already
277+
# configured this metadata address, so all requests will
278+
# be via that single agent.
279+
LOG.info('DAD failed for address %(address)s on interface '
280+
'%(interface)s in namespace %(namespace)s on network '
281+
'%(network)s, deleting it. Exception: %(exception)s',
282+
{'address': bind_address_v6,
283+
'interface': bind_interface,
284+
'namespace': ns_name,
285+
'network': network_id,
286+
'exception': str(exc)})
287+
try:
288+
ip_lib.delete_ip_address(bind_address_v6, bind_interface,
289+
namespace=ns_name)
290+
except Exception as exc:
291+
# do not re-raise a delete failure, just log
292+
LOG.info('Address deletion failure: %s', str(exc))
293+
return
272294
pm.enable()
273295
monitor.register(uuid, METADATA_SERVICE_NAME, pm)
274296
cls.monitors[router_id] = pm
@@ -363,6 +385,6 @@ def apply_metadata_nat_rules(router, proxy):
363385
if netutils.is_ipv6_enabled():
364386
for c, r in proxy.metadata_nat_rules(
365387
proxy.metadata_port,
366-
metadata_address=(constants.METADATA_V6_IP + '/128')):
388+
metadata_address=(common_constants.METADATA_V6_CIDR)):
367389
router.iptables_manager.ipv6['nat'].add_rule(c, r)
368390
router.iptables_manager.apply()

neutron/common/_constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,6 @@
8181

8282
# The lowest binding index for L3 agents and DHCP agents.
8383
LOWEST_AGENT_BINDING_INDEX = 1
84+
85+
# Neutron-lib defines this with a /64 but it should be /128
86+
METADATA_V6_CIDR = constants.METADATA_V6_IP + '/128'

neutron/conf/agent/database/agentschedulers_db.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
'network. If this number is greater than 1, the '
3333
'scheduler automatically assigns multiple DHCP agents '
3434
'for a given tenant network, providing high '
35-
'availability for DHCP service.')),
35+
'availability for the DHCP service. However this does '
36+
'not provide high availability for the IPv6 metadata '
37+
'service in isolated networks.')),
3638
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
3739
default=False,
3840
help=_('Enable services on an agent with admin_state_up '

neutron/tests/unit/agent/dhcp/test_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from neutron.agent.linux import interface
3838
from neutron.agent.linux import utils as linux_utils
3939
from neutron.agent.metadata import driver as metadata_driver
40+
from neutron.common import _constants as common_constants
4041
from neutron.common import config as common_config
4142
from neutron.common import utils
4243
from neutron.conf.agent import common as config
@@ -1970,7 +1971,7 @@ def _test_setup_helper(self, device_is_ready, ipv6_enabled=True,
19701971
expected_ips = ['172.9.9.9/24', const.METADATA_CIDR]
19711972

19721973
if ipv6_enabled:
1973-
expected_ips.append(const.METADATA_V6_CIDR)
1974+
expected_ips.append(common_constants.METADATA_V6_CIDR)
19741975

19751976
expected = [mock.call.get_device_name(port)]
19761977

neutron/tests/unit/agent/linux/test_dhcp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from neutron.agent.linux import dhcp
3333
from neutron.agent.linux import ip_lib
3434
from neutron.cmd import runtime_checks as checks
35+
from neutron.common import _constants as common_constants
3536
from neutron.conf.agent import common as config
3637
from neutron.conf.agent import dhcp as dhcp_config
3738
from neutron.conf import common as base_config
@@ -3258,7 +3259,7 @@ def mock_update(port_id, dict):
32583259
if enable_isolated_metadata or force_metadata:
32593260
expect_ips.extend([
32603261
constants.METADATA_CIDR,
3261-
constants.METADATA_V6_CIDR])
3262+
common_constants.METADATA_V6_CIDR])
32623263
mgr.driver.init_l3.assert_called_with('ns-XXX',
32633264
expect_ips,
32643265
namespace='qdhcp-ns')

neutron/tests/unit/agent/linux/test_ip_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ def test_wait_until_address_ready_non_existent_address(self):
790790
def test_wait_until_address_dadfailed(self):
791791
self.addr_cmd.list = mock.Mock(
792792
return_value=[{'tentative': True, 'dadfailed': True}])
793-
with testtools.ExpectedException(ip_lib.AddressNotReady):
793+
with testtools.ExpectedException(ip_lib.DADFailed):
794794
self.addr_cmd.wait_until_address_ready('abcd::1234')
795795

796796
@mock.patch.object(common_utils, 'wait_until_true')

neutron/tests/unit/agent/metadata/test_driver.py

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from neutron.agent.l3 import agent as l3_agent
2626
from neutron.agent.l3 import router_info
27+
from neutron.agent.linux import ip_lib
2728
from neutron.agent.linux import iptables_manager
2829
from neutron.agent.linux import utils as linux_utils
2930
from neutron.agent.metadata import driver as metadata_driver
@@ -74,6 +75,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
7475
EUNAME = 'neutron'
7576
EGNAME = 'neutron'
7677
METADATA_DEFAULT_IP = '169.254.169.254'
78+
METADATA_DEFAULT_IPV6 = 'fe80::a9fe:a9fe'
7779
METADATA_PORT = 8080
7880
METADATA_SOCKET = '/socket/path'
7981
PIDFILE = 'pidfile'
@@ -138,7 +140,7 @@ def test_after_router_updated_should_not_call_add_metadata_rules(self):
138140
agent._process_updated_router(router)
139141
f.assert_not_called()
140142

141-
def test_spawn_metadata_proxy(self):
143+
def _test_spawn_metadata_proxy(self, dad_failed=False):
142144
router_id = _uuid()
143145
router_ns = 'qrouter-%s' % router_id
144146
ip_class_path = 'neutron.agent.linux.ip_lib.IPWrapper'
@@ -162,29 +164,41 @@ def test_spawn_metadata_proxy(self):
162164
'NamespaceManager.list_all', return_value={}),\
163165
mock.patch(
164166
'neutron.agent.linux.ip_lib.'
165-
'IpAddrCommand.wait_until_address_ready') as mock_wait:
167+
'IpAddrCommand.wait_until_address_ready') as mock_wait,\
168+
mock.patch(
169+
'neutron.agent.linux.ip_lib.'
170+
'delete_ip_address') as mock_del:
166171
agent = l3_agent.L3NATAgent('localhost')
172+
agent.process_monitor = mock.Mock()
167173
cfg_file = os.path.join(
168174
metadata_driver.HaproxyConfigurator.get_config_path(
169175
agent.conf.state_path),
170176
"%s.conf" % router_id)
171177
mock_open = self.useFixture(
172178
lib_fixtures.OpenFixture(cfg_file)).mock_open
173-
mock_wait.return_value = True
179+
if dad_failed:
180+
mock_wait.side_effect = ip_lib.DADFailed(
181+
address=self.METADATA_DEFAULT_IP, reason='DAD failed')
182+
else:
183+
mock_wait.return_value = True
174184
agent.metadata_driver.spawn_monitored_metadata_proxy(
175185
agent.process_monitor,
176186
router_ns,
177187
self.METADATA_PORT,
178188
agent.conf,
179189
bind_address=self.METADATA_DEFAULT_IP,
180-
router_id=router_id)
190+
router_id=router_id,
191+
bind_address_v6=self.METADATA_DEFAULT_IPV6,
192+
bind_interface='fake-if')
181193

182194
netns_execute_args = [
183195
'haproxy',
184196
'-f', cfg_file]
185197

186198
log_tag = ("haproxy-" + metadata_driver.METADATA_SERVICE_NAME +
187199
"-" + router_id)
200+
bind_v6_line = 'bind %s:%s interface %s' % (
201+
self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if')
188202
cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % {
189203
'user': self.EUNAME,
190204
'group': self.EGNAME,
@@ -197,18 +211,34 @@ def test_spawn_metadata_proxy(self):
197211
'pidfile': self.PIDFILE,
198212
'log_level': 'debug',
199213
'log_tag': log_tag,
200-
'bind_v6_line': ''}
201-
202-
mock_open.assert_has_calls([
203-
mock.call(cfg_file, 'w'),
204-
mock.call().write(cfg_contents)],
205-
any_order=True)
206-
207-
ip_mock.assert_has_calls([
208-
mock.call(namespace=router_ns),
209-
mock.call().netns.execute(netns_execute_args, addl_env=None,
210-
run_as_root=True)
211-
])
214+
'bind_v6_line': bind_v6_line}
215+
216+
if dad_failed:
217+
agent.process_monitor.register.assert_not_called()
218+
mock_del.assert_called_once_with(self.METADATA_DEFAULT_IPV6,
219+
'fake-if',
220+
namespace=router_ns)
221+
else:
222+
mock_open.assert_has_calls([
223+
mock.call(cfg_file, 'w'),
224+
mock.call().write(cfg_contents)], any_order=True)
225+
226+
ip_mock.assert_has_calls([
227+
mock.call(namespace=router_ns),
228+
mock.call().netns.execute(netns_execute_args,
229+
addl_env=None, run_as_root=True)
230+
])
231+
232+
agent.process_monitor.register.assert_called_once_with(
233+
router_id, metadata_driver.METADATA_SERVICE_NAME,
234+
mock.ANY)
235+
mock_del.assert_not_called()
236+
237+
def test_spawn_metadata_proxy(self):
238+
self._test_spawn_metadata_proxy()
239+
240+
def test_spawn_metadata_proxy_dad_failed(self):
241+
self._test_spawn_metadata_proxy(dad_failed=True)
212242

213243
def test_create_config_file_wrong_user(self):
214244
with mock.patch('pwd.getpwnam', side_effect=KeyError):

0 commit comments

Comments
 (0)