Skip to content

Commit 3a5f892

Browse files
kk7dsgibizer
authored andcommitted
Add service version check workaround for FFU
We recently added a hard failure to nova service startup for the case where computes were more than one version old (as indicated by their service record). This helps to prevent starting up new control services when a very old compute is still running. However, during an FFU, control services that have skipped multiple versions will be started and find the older compute records (which could not be updated yet due to their reliance on the control services being up) and refuse to start. This creates a cross-dependency which is not resolvable without hacking the database. This patch adds a workaround flag to allow turning that hard fail into a warning to proceed past the issue. This less-than-ideal solution is simple and backportable, but perhaps a better solution can be implemented for the future. Related-Bug: #1958883 Change-Id: Iddbc9b2a13f19cea9a996aeadfe891f4ef3b0264 (cherry picked from commit 7d2e481)
1 parent 90c5190 commit 3a5f892

File tree

5 files changed

+64
-2
lines changed

5 files changed

+64
-2
lines changed

nova/api/openstack/wsgi_app.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ def _get_config_files(env=None):
4444

4545

4646
def _setup_service(host, name):
47-
utils.raise_if_old_compute()
47+
try:
48+
utils.raise_if_old_compute()
49+
except exception.TooOldComputeService as e:
50+
if CONF.workarounds.disable_compute_service_check_for_ffu:
51+
LOG.warning(str(e))
52+
else:
53+
raise
4854

4955
binary = name if name.startswith('nova-') else "nova-%s" % name
5056

nova/conf/workarounds.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,16 @@
371371
Related options:
372372
373373
* :oslo.config:option:`DEFAULT.compute_driver` (libvirt)
374+
"""),
375+
cfg.BoolOpt('disable_compute_service_check_for_ffu',
376+
default=False,
377+
help="""
378+
If this is set, the normal safety check for old compute services will be
379+
treated as a warning instead of an error. This is only to be enabled to
380+
facilitate a Fast-Forward upgrade where new control services are being started
381+
before compute nodes have been able to update their service record. In an FFU,
382+
the service records in the database will be more than one version old until
383+
the compute nodes start up, but control services need to be online first.
374384
"""),
375385
]
376386

nova/service.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,13 @@ def create(cls, host=None, binary=None, topic=None, manager=None,
261261
# up before it allows the service to be created. The
262262
# raise_if_old_compute() depends on the RPC to be up and does not
263263
# implement its own retry mechanism to connect to the conductor.
264-
utils.raise_if_old_compute()
264+
try:
265+
utils.raise_if_old_compute()
266+
except exception.TooOldComputeService as e:
267+
if CONF.workarounds.disable_compute_service_check_for_ffu:
268+
LOG.warning(str(e))
269+
else:
270+
raise
265271

266272
return service_obj
267273

nova/tests/unit/api/openstack/test_wsgi_app.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from oslotest import base
1919

2020
from nova.api.openstack import wsgi_app
21+
from nova import exception
2122
from nova import test
2223
from nova.tests import fixtures as nova_fixtures
2324

@@ -87,3 +88,19 @@ def test_init_application_called_twice(
8788
wsgi_app.init_application('nova-api')
8889
self.assertIn('Global data already initialized, not re-initializing.',
8990
self.stdlog.logger.output)
91+
92+
@mock.patch('nova.objects.Service.get_by_host_and_binary')
93+
@mock.patch('nova.utils.raise_if_old_compute')
94+
def test_setup_service_version_workaround(self, mock_check_old, mock_get):
95+
mock_check_old.side_effect = exception.TooOldComputeService(
96+
oldest_supported_version='2',
97+
scope='scope',
98+
min_service_level=2,
99+
oldest_supported_service=1)
100+
101+
self.assertRaises(exception.TooOldComputeService,
102+
wsgi_app._setup_service, 'myhost', 'api')
103+
wsgi_app.CONF.set_override(
104+
'disable_compute_service_check_for_ffu', True,
105+
group='workarounds')
106+
wsgi_app._setup_service('myhost', 'api')

nova/tests/unit/test_service.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,29 @@ def fake_wait(*args, **kwargs):
286286
mock_check_old.assert_called_once_with()
287287
mock_wait.assert_called_once_with(mock.ANY)
288288

289+
@mock.patch('nova.utils.raise_if_old_compute')
290+
def test_old_compute_version_check_workaround(
291+
self, mock_check_old):
292+
293+
mock_check_old.side_effect = exception.TooOldComputeService(
294+
oldest_supported_version='2',
295+
scope='scope',
296+
min_service_level=2,
297+
oldest_supported_service=1)
298+
299+
self.assertRaises(exception.TooOldComputeService,
300+
service.Service.create,
301+
self.host, 'nova-conductor', self.topic,
302+
'nova.tests.unit.test_service.FakeManager')
303+
304+
CONF.set_override('disable_compute_service_check_for_ffu', True,
305+
group='workarounds')
306+
307+
service.Service.create(self.host, 'nova-conductor', self.topic,
308+
'nova.tests.unit.test_service.FakeManager')
309+
310+
mock_check_old.assert_has_calls([mock.call(), mock.call()])
311+
289312

290313
class TestWSGIService(test.NoDBTestCase):
291314

0 commit comments

Comments
 (0)