Skip to content

Commit 1e75373

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Sanity check that new hosts have no instances"
2 parents aba83b3 + abbac59 commit 1e75373

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

nova/compute/manager.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,18 @@ def _check_for_host_rename(self, nodes_by_uuid):
15701570
LOG.debug('Verified node %s matches my host %s',
15711571
node.uuid, self.host)
15721572

1573+
def _sanity_check_new_host(self):
1574+
instances_on_hv = self.driver.list_instance_uuids()
1575+
if len(instances_on_hv) > 0:
1576+
# This means we have instances on our hypervisor, but we think
1577+
# we are a new host (i.e. we created a new service record). That
1578+
# likely means we're pointed at an empty database or the wrong
1579+
# cell.
1580+
raise exception.InvalidConfiguration(
1581+
'My hypervisor has existing instances, but I appear to be '
1582+
'a new service in this database. Possible database '
1583+
'configuration error, refusing to start!')
1584+
15731585
def init_host(self, service_ref):
15741586
"""Initialization for a standalone compute service."""
15751587

@@ -1578,6 +1590,10 @@ def init_host(self, service_ref):
15781590
# to record a locally-persistent node identity because
15791591
# we have upgraded from a previous version.
15801592
self._ensure_existing_node_identity(service_ref)
1593+
else:
1594+
# If we are a new service (in the database), make sure we have no
1595+
# instances on our hypervisor as we would expect.
1596+
self._sanity_check_new_host()
15811597

15821598
if CONF.pci.device_spec:
15831599
# Simply loading the PCI passthrough spec will do a bunch of

nova/tests/functional/wsgi/test_services.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ def test_evacuate_then_delete_compute_service(self):
120120
"""Tests a scenario where a server is created on a host, the host
121121
goes down, the server is evacuated to another host, and then the
122122
source host compute service is deleted. After that the deleted
123-
compute service is restarted and starts successfully.
123+
compute service is restarted and refuses to run because it finds its
124+
service record deleted even though it has instances.
124125
"""
125126
# Create our source host that we will evacuate *from* later.
126127
host1 = self._start_compute('host1')
@@ -154,12 +155,16 @@ def test_evacuate_then_delete_compute_service(self):
154155
# Then the resource provider is also deleted.
155156
resp = self.placement.get('/resource_providers/%s' % rp_uuid)
156157
self.assertEqual(404, resp.status)
157-
# Try to restart the host1 compute service to create a new service
158-
# and a new resource provider.
159-
self.restart_compute_service(host1)
160-
# Make sure the compute service record for host1 is recreated.
161-
service = self.admin_api.get_services(
162-
binary='nova-compute', host='host1')[0]
158+
# Try to restart the host1 compute service and make sure it recognizes
159+
# that its service record has been deleted even though it still has
160+
# instances running.
161+
self.assertRaises(exception.InvalidConfiguration,
162+
self.restart_compute_service, host1)
163+
# Make sure the compute service record for host1 is not recreated
164+
# since we aborted startup.
165+
services = self.admin_api.get_services(
166+
binary='nova-compute', host='host1')
167+
self.assertEqual([], services)
163168

164169
def test_migrate_confirm_after_deleted_source_compute(self):
165170
"""Tests a scenario where a server is cold migrated and while in

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,8 @@ def _make_instance_list(db_list):
916916
return instance_obj._make_instance_list(
917917
self.context, objects.InstanceList(), db_list, None)
918918

919+
@mock.patch.object(manager.ComputeManager,
920+
'_sanity_check_new_host')
919921
@mock.patch.object(manager.ComputeManager,
920922
'_ensure_existing_node_identity')
921923
@mock.patch.object(manager.ComputeManager, '_get_nodes')
@@ -937,7 +939,7 @@ def _do_mock_calls(mock_update_scheduler, mock_inst_init,
937939
mock_destroy, mock_admin_ctxt, mock_host_get,
938940
mock_init_host,
939941
mock_error_interrupted, mock_get_nodes,
940-
mock_existing_node):
942+
mock_existing_node, mock_check_new):
941943
mock_admin_ctxt.return_value = self.context
942944
inst_list = _make_instance_list(startup_instances)
943945
mock_host_get.return_value = inst_list
@@ -948,6 +950,7 @@ def _do_mock_calls(mock_update_scheduler, mock_inst_init,
948950

949951
self.compute.init_host(None)
950952

953+
mock_check_new.assert_called_once_with()
951954
mock_existing_node.assert_not_called()
952955
mock_validate_pinning.assert_called_once_with(inst_list)
953956
mock_validate_vtpm.assert_called_once_with(inst_list)
@@ -1001,6 +1004,18 @@ def test_init_host_no_instances(
10011004
mock_get_nodes.assert_called_once_with(
10021005
test.MatchType(nova.context.RequestContext))
10031006

1007+
def test_init_host_new_with_instances(self):
1008+
"""Tests the case where we start up without an existing service_ref,
1009+
indicating that we are a new service, but our hypervisor reports
1010+
existing instances. This indicates we were moved to another cell,
1011+
our database got wiped, etc.
1012+
"""
1013+
with mock.patch.object(self.compute.driver,
1014+
'list_instance_uuids') as mock_insts:
1015+
mock_insts.return_value = ['foo']
1016+
self.assertRaises(exception.InvalidConfiguration,
1017+
self.compute.init_host, None)
1018+
10041019
@mock.patch('nova.objects.InstanceList')
10051020
@mock.patch('nova.objects.MigrationList.get_by_filters')
10061021
@mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids')

0 commit comments

Comments
 (0)