Skip to content

Commit 059ee1b

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Suppress IPv6 metadata DAD failure and delete address" into stable/yoga
2 parents 15e649c + defb601 commit 059ee1b

File tree

11 files changed

+139
-27
lines changed

11 files changed

+139
-27
lines changed

doc/source/admin/config-dhcp-ha.rst

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,38 @@ To test the HA of DHCP agent:
442442

443443
#. Start DHCP agent on HostB. The VM gets the wanted IP again.
444444

445+
No HA for metadata service on isolated networks
446+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
447+
448+
All Neutron backends using the DHCP agent can also provide `metadata service
449+
<https://docs.openstack.org/nova/latest/user/metadata.html>`_ in isolated
450+
networks (i.e. networks without a router). In this case the DHCP agent manages
451+
the metadata service (see config option `enable_isolated_metadata
452+
<https://docs.openstack.org/neutron/latest/configuration/dhcp-agent.html#DEFAULT.enable_isolated_metadata>`_).
453+
454+
Note however that the metadata service is only redundant for IPv4, and not
455+
IPv6, even when the DHCP service is configured to be highly available
456+
(config option `dhcp_agents_per_network
457+
<https://docs.openstack.org/neutron/latest/configuration/neutron.html#DEFAULT.dhcp_agents_per_network>`_
458+
> 1). This is because the DHCP agent will insert a route to the well known
459+
metadata IPv4 address (`169.254.169.254`) via its own IP address, so it will
460+
be reachable as long as the DHCP service is available at that IP address.
461+
This also means that recovery after a failure is tied to the renewal of the
462+
DHCP lease, since that route will only change if the DHCP server for a VM
463+
changes.
464+
465+
With IPv6, the well known metadata IPv6 address (`fe80::a9fe:a9fe`) is used,
466+
but directly configured in the DHCP agent network namespace.
467+
Due to the enforcement of duplicate address detection (DAD), this address
468+
can only be configured in at most one DHCP network namespaces at any time.
469+
See `RFC 4862 <https://www.rfc-editor.org/rfc/rfc4862#section-5.4>`_ for
470+
details on the DAD process.
471+
472+
For this reason, even when you have multiple DHCP agents, an arbitrary one
473+
(where the metadata IPv6 address is not in `dadfailed` state) will serve all
474+
metadata requests over IPv6. When that metadata service instance becomes
475+
unreachable there is no failover and the service will become unreachable.
476+
445477
Disabling and removing an agent
446478
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
447479

neutron/agent/linux/dhcp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from neutron.agent.linux import ip_lib
4141
from neutron.agent.linux import iptables_manager
4242
from neutron.cmd import runtime_checks as checks
43+
from neutron.common import _constants as common_constants
4344
from neutron.common import utils as common_utils
4445
from neutron.ipam import utils as ipam_utils
4546
from neutron.privileged.agent.linux import dhcp as priv_dhcp
@@ -1773,7 +1774,7 @@ def setup(self, network):
17731774
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
17741775
ip_cidrs.append(constants.METADATA_CIDR)
17751776
if netutils.is_ipv6_enabled():
1776-
ip_cidrs.append(constants.METADATA_V6_CIDR)
1777+
ip_cidrs.append(common_constants.METADATA_V6_CIDR)
17771778

17781779
self.driver.init_l3(interface_name, ip_cidrs,
17791780
namespace=network.namespace)

neutron/agent/linux/ip_lib.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class AddressNotReady(exceptions.NeutronException):
103103
"become ready: %(reason)s")
104104

105105

106+
class DADFailed(AddressNotReady):
107+
pass
108+
109+
106110
InvalidArgument = privileged.InvalidArgument
107111

108112

@@ -581,7 +585,7 @@ def wait_until_address_ready(self, address, wait_time=30):
581585
"""Wait until an address is no longer marked 'tentative' or 'dadfailed'
582586
583587
raises AddressNotReady if times out, address not present on interface
584-
or DAD fails
588+
raises DADFailed if Duplicate Address Detection fails
585589
"""
586590
def is_address_ready():
587591
try:
@@ -593,7 +597,7 @@ def is_address_ready():
593597
# Since both 'dadfailed' and 'tentative' will be set if DAD fails,
594598
# check 'dadfailed' first just to be explicit
595599
if addr_info['dadfailed']:
596-
raise AddressNotReady(
600+
raise DADFailed(
597601
address=address, reason=_('Duplicate address detected'))
598602
if addr_info['tentative']:
599603
return False

neutron/agent/metadata/driver.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from neutron.agent.linux import external_process
3434
from neutron.agent.linux import ip_lib
3535
from neutron.agent.linux import utils as linux_utils
36+
from neutron.common import _constants as common_constants
3637
from neutron.common import coordination
3738
from neutron.common import utils as common_utils
3839

@@ -266,9 +267,30 @@ def spawn_monitored_metadata_proxy(cls, monitor, ns_name, port, conf,
266267
# HAProxy cannot bind() until IPv6 Duplicate Address Detection
267268
# completes. We must wait until the address leaves its 'tentative'
268269
# state.
269-
ip_lib.IpAddrCommand(
270-
parent=ip_lib.IPDevice(name=bind_interface, namespace=ns_name)
271-
).wait_until_address_ready(address=bind_address_v6)
270+
try:
271+
ip_lib.IpAddrCommand(
272+
parent=ip_lib.IPDevice(name=bind_interface,
273+
namespace=ns_name)
274+
).wait_until_address_ready(address=bind_address_v6)
275+
except ip_lib.DADFailed as exc:
276+
# This failure means that another DHCP agent has already
277+
# configured this metadata address, so all requests will
278+
# be via that single agent.
279+
LOG.info('DAD failed for address %(address)s on interface '
280+
'%(interface)s in namespace %(namespace)s on network '
281+
'%(network)s, deleting it. Exception: %(exception)s',
282+
{'address': bind_address_v6,
283+
'interface': bind_interface,
284+
'namespace': ns_name,
285+
'network': network_id,
286+
'exception': str(exc)})
287+
try:
288+
ip_lib.delete_ip_address(bind_address_v6, bind_interface,
289+
namespace=ns_name)
290+
except Exception as exc:
291+
# do not re-raise a delete failure, just log
292+
LOG.info('Address deletion failure: %s', str(exc))
293+
return
272294
pm.enable()
273295
monitor.register(uuid, METADATA_SERVICE_NAME, pm)
274296
cls.monitors[router_id] = pm
@@ -363,6 +385,6 @@ def apply_metadata_nat_rules(router, proxy):
363385
if netutils.is_ipv6_enabled():
364386
for c, r in proxy.metadata_nat_rules(
365387
proxy.metadata_port,
366-
metadata_address=(constants.METADATA_V6_IP + '/128')):
388+
metadata_address=(common_constants.METADATA_V6_CIDR)):
367389
router.iptables_manager.ipv6['nat'].add_rule(c, r)
368390
router.iptables_manager.apply()

neutron/common/_constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,6 @@
8181

8282
# The lowest binding index for L3 agents and DHCP agents.
8383
LOWEST_AGENT_BINDING_INDEX = 1
84+
85+
# Neutron-lib defines this with a /64 but it should be /128
86+
METADATA_V6_CIDR = constants.METADATA_V6_IP + '/128'

neutron/conf/agent/database/agentschedulers_db.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
'network. If this number is greater than 1, the '
3333
'scheduler automatically assigns multiple DHCP agents '
3434
'for a given tenant network, providing high '
35-
'availability for DHCP service.')),
35+
'availability for the DHCP service. However this does '
36+
'not provide high availability for the IPv6 metadata '
37+
'service in isolated networks.')),
3638
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
3739
default=False,
3840
help=_('Enable services on an agent with admin_state_up '

neutron/tests/unit/agent/dhcp/test_agent.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from neutron.agent.linux import interface
3838
from neutron.agent.linux import utils as linux_utils
3939
from neutron.agent.metadata import driver as metadata_driver
40+
from neutron.common import _constants as common_constants
4041
from neutron.common import config as common_config
4142
from neutron.common import utils
4243
from neutron.conf.agent import common as config
@@ -1970,7 +1971,7 @@ def _test_setup_helper(self, device_is_ready, ipv6_enabled=True,
19701971
expected_ips = ['172.9.9.9/24', const.METADATA_CIDR]
19711972

19721973
if ipv6_enabled:
1973-
expected_ips.append(const.METADATA_V6_CIDR)
1974+
expected_ips.append(common_constants.METADATA_V6_CIDR)
19741975

19751976
expected = [mock.call.get_device_name(port)]
19761977

neutron/tests/unit/agent/linux/test_dhcp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from neutron.agent.linux import dhcp
3333
from neutron.agent.linux import ip_lib
3434
from neutron.cmd import runtime_checks as checks
35+
from neutron.common import _constants as common_constants
3536
from neutron.conf.agent import common as config
3637
from neutron.conf.agent import dhcp as dhcp_config
3738
from neutron.conf import common as base_config
@@ -3258,7 +3259,7 @@ def mock_update(port_id, dict):
32583259
if enable_isolated_metadata or force_metadata:
32593260
expect_ips.extend([
32603261
constants.METADATA_CIDR,
3261-
constants.METADATA_V6_CIDR])
3262+
common_constants.METADATA_V6_CIDR])
32623263
mgr.driver.init_l3.assert_called_with('ns-XXX',
32633264
expect_ips,
32643265
namespace='qdhcp-ns')

neutron/tests/unit/agent/linux/test_ip_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ def test_wait_until_address_ready_non_existent_address(self):
790790
def test_wait_until_address_dadfailed(self):
791791
self.addr_cmd.list = mock.Mock(
792792
return_value=[{'tentative': True, 'dadfailed': True}])
793-
with testtools.ExpectedException(ip_lib.AddressNotReady):
793+
with testtools.ExpectedException(ip_lib.DADFailed):
794794
self.addr_cmd.wait_until_address_ready('abcd::1234')
795795

796796
@mock.patch.object(common_utils, 'wait_until_true')

neutron/tests/unit/agent/metadata/test_driver.py

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from neutron.agent.l3 import agent as l3_agent
2626
from neutron.agent.l3 import router_info
27+
from neutron.agent.linux import ip_lib
2728
from neutron.agent.linux import iptables_manager
2829
from neutron.agent.linux import utils as linux_utils
2930
from neutron.agent.metadata import driver as metadata_driver
@@ -74,6 +75,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
7475
EUNAME = 'neutron'
7576
EGNAME = 'neutron'
7677
METADATA_DEFAULT_IP = '169.254.169.254'
78+
METADATA_DEFAULT_IPV6 = 'fe80::a9fe:a9fe'
7779
METADATA_PORT = 8080
7880
METADATA_SOCKET = '/socket/path'
7981
PIDFILE = 'pidfile'
@@ -138,7 +140,7 @@ def test_after_router_updated_should_not_call_add_metadata_rules(self):
138140
agent._process_updated_router(router)
139141
f.assert_not_called()
140142

141-
def test_spawn_metadata_proxy(self):
143+
def _test_spawn_metadata_proxy(self, dad_failed=False):
142144
router_id = _uuid()
143145
router_ns = 'qrouter-%s' % router_id
144146
ip_class_path = 'neutron.agent.linux.ip_lib.IPWrapper'
@@ -162,29 +164,41 @@ def test_spawn_metadata_proxy(self):
162164
'NamespaceManager.list_all', return_value={}),\
163165
mock.patch(
164166
'neutron.agent.linux.ip_lib.'
165-
'IpAddrCommand.wait_until_address_ready') as mock_wait:
167+
'IpAddrCommand.wait_until_address_ready') as mock_wait,\
168+
mock.patch(
169+
'neutron.agent.linux.ip_lib.'
170+
'delete_ip_address') as mock_del:
166171
agent = l3_agent.L3NATAgent('localhost')
172+
agent.process_monitor = mock.Mock()
167173
cfg_file = os.path.join(
168174
metadata_driver.HaproxyConfigurator.get_config_path(
169175
agent.conf.state_path),
170176
"%s.conf" % router_id)
171177
mock_open = self.useFixture(
172178
lib_fixtures.OpenFixture(cfg_file)).mock_open
173-
mock_wait.return_value = True
179+
if dad_failed:
180+
mock_wait.side_effect = ip_lib.DADFailed(
181+
address=self.METADATA_DEFAULT_IP, reason='DAD failed')
182+
else:
183+
mock_wait.return_value = True
174184
agent.metadata_driver.spawn_monitored_metadata_proxy(
175185
agent.process_monitor,
176186
router_ns,
177187
self.METADATA_PORT,
178188
agent.conf,
179189
bind_address=self.METADATA_DEFAULT_IP,
180-
router_id=router_id)
190+
router_id=router_id,
191+
bind_address_v6=self.METADATA_DEFAULT_IPV6,
192+
bind_interface='fake-if')
181193

182194
netns_execute_args = [
183195
'haproxy',
184196
'-f', cfg_file]
185197

186198
log_tag = ("haproxy-" + metadata_driver.METADATA_SERVICE_NAME +
187199
"-" + router_id)
200+
bind_v6_line = 'bind %s:%s interface %s' % (
201+
self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if')
188202
cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % {
189203
'user': self.EUNAME,
190204
'group': self.EGNAME,
@@ -197,18 +211,34 @@ def test_spawn_metadata_proxy(self):
197211
'pidfile': self.PIDFILE,
198212
'log_level': 'debug',
199213
'log_tag': log_tag,
200-
'bind_v6_line': ''}
201-
202-
mock_open.assert_has_calls([
203-
mock.call(cfg_file, 'w'),
204-
mock.call().write(cfg_contents)],
205-
any_order=True)
206-
207-
ip_mock.assert_has_calls([
208-
mock.call(namespace=router_ns),
209-
mock.call().netns.execute(netns_execute_args, addl_env=None,
210-
run_as_root=True)
211-
])
214+
'bind_v6_line': bind_v6_line}
215+
216+
if dad_failed:
217+
agent.process_monitor.register.assert_not_called()
218+
mock_del.assert_called_once_with(self.METADATA_DEFAULT_IPV6,
219+
'fake-if',
220+
namespace=router_ns)
221+
else:
222+
mock_open.assert_has_calls([
223+
mock.call(cfg_file, 'w'),
224+
mock.call().write(cfg_contents)], any_order=True)
225+
226+
ip_mock.assert_has_calls([
227+
mock.call(namespace=router_ns),
228+
mock.call().netns.execute(netns_execute_args,
229+
addl_env=None, run_as_root=True)
230+
])
231+
232+
agent.process_monitor.register.assert_called_once_with(
233+
router_id, metadata_driver.METADATA_SERVICE_NAME,
234+
mock.ANY)
235+
mock_del.assert_not_called()
236+
237+
def test_spawn_metadata_proxy(self):
238+
self._test_spawn_metadata_proxy()
239+
240+
def test_spawn_metadata_proxy_dad_failed(self):
241+
self._test_spawn_metadata_proxy(dad_failed=True)
212242

213243
def test_create_config_file_wrong_user(self):
214244
with mock.patch('pwd.getpwnam', side_effect=KeyError):

0 commit comments

Comments
 (0)