Skip to content

Commit 72370a1

Browse files
committed
Check our nodes for hypervisor_hostname changes
When we are loading our ComputeNode objects by UUID according to what the virt driver reported, we can sanity check the DB records against the virt driver's hostname. This covers the case where our CONF.host has not changed but the hostname reported by the virt driver has, assuming we can find the ComputeNode object(s) that match our persistent node uuid. Related to blueprint stable-compute-uuid Change-Id: I41635210d7d6f46b437b06d2570a26a80ed8676a
1 parent f01a90c commit 72370a1

File tree

6 files changed

+65
-24
lines changed

6 files changed

+65
-24
lines changed

nova/compute/manager.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,25 +1473,35 @@ def _get_nodes(self, context):
14731473
node.
14741474
"""
14751475
try:
1476-
node_uuids = self.driver.get_available_node_uuids()
1476+
node_ids = self.driver.get_nodenames_by_uuid()
14771477
except exception.VirtDriverNotReady:
14781478
LOG.warning(
14791479
"Virt driver is not ready. If this is the first time this "
14801480
"service is starting on this host, then you can ignore "
14811481
"this warning.")
14821482
return {}
14831483

1484-
nodes = objects.ComputeNodeList.get_all_by_uuids(context, node_uuids)
1484+
nodes = objects.ComputeNodeList.get_all_by_uuids(context,
1485+
list(node_ids.keys()))
14851486
if not nodes:
14861487
# NOTE(danms): This should only happen if the compute_id is
14871488
# pre-provisioned on a host that has never started.
14881489
LOG.warning('Compute nodes %s for host %s were not found in the '
14891490
'database. If this is the first time this service is '
14901491
'starting on this host, then you can ignore this '
14911492
'warning.',
1492-
node_uuids, self.host)
1493+
list(node_ids.keys()), self.host)
14931494
return {}
14941495

1496+
for node in nodes:
1497+
if node.hypervisor_hostname != node_ids.get(node.uuid):
1498+
raise exception.InvalidConfiguration(
1499+
('My compute node %s has hypervisor_hostname %s '
1500+
'but virt driver reports it should be %s. Possible '
1501+
'rename detected, refusing to start!') % (
1502+
node.uuid, node.hypervisor_hostname,
1503+
node_ids.get(node.uuid)))
1504+
14951505
return {n.uuid: n for n in nodes}
14961506

14971507
def _ensure_existing_node_identity(self, service_ref):

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,24 +1162,48 @@ def test_init_host_with_in_progress_evacuations(self, mock_destroy_evac,
11621162
mock_get_nodes.return_value.keys())
11631163

11641164
@mock.patch.object(objects.ComputeNodeList, 'get_all_by_uuids')
1165-
@mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
1165+
@mock.patch.object(fake_driver.FakeDriver, 'get_nodenames_by_uuid')
11661166
def test_get_nodes(self, mock_driver_get_nodes, mock_get_by_uuid):
1167-
mock_driver_get_nodes.return_value = ['fake_node1', 'fake_node2']
1167+
mock_driver_get_nodes.return_value = {uuids.node_fake_node1: 'host',
1168+
uuids.node_fake_node2: 'host'}
11681169
# NOTE(danms): The fake driver, by default, uses
1169-
# uuidsentinel.$node_name, so we can predict the uuids it will
1170+
# uuidsentinel.node_$node_name, so we can predict the uuids it will
11701171
# return here.
1171-
cn1 = objects.ComputeNode(uuid=uuids.fake_node1)
1172-
cn2 = objects.ComputeNode(uuid=uuids.fake_node2)
1172+
cn1 = objects.ComputeNode(uuid=uuids.node_fake_node1,
1173+
hypervisor_hostname='host')
1174+
cn2 = objects.ComputeNode(uuid=uuids.node_fake_node2,
1175+
hypervisor_hostname='host')
11731176
mock_get_by_uuid.return_value = [cn1, cn2]
11741177

11751178
nodes = self.compute._get_nodes(self.context)
11761179

1177-
self.assertEqual({uuids.fake_node1: cn1, uuids.fake_node2: cn2}, nodes)
1180+
self.assertEqual({uuids.node_fake_node1: cn1,
1181+
uuids.node_fake_node2: cn2}, nodes)
11781182

11791183
mock_driver_get_nodes.assert_called_once_with()
11801184
mock_get_by_uuid.assert_called_once_with(self.context,
1181-
[uuids.fake_node1,
1182-
uuids.fake_node2])
1185+
[uuids.node_fake_node1,
1186+
uuids.node_fake_node2])
1187+
1188+
@mock.patch.object(objects.ComputeNodeList, 'get_all_by_uuids')
1189+
@mock.patch.object(fake_driver.FakeDriver, 'get_nodenames_by_uuid')
1190+
def test_get_nodes_mismatch(self, mock_driver_get_nodes, mock_get_by_uuid):
1191+
# Virt driver reports a (hypervisor_) hostname of 'host1'
1192+
mock_driver_get_nodes.return_value = {uuids.node_fake_node1: 'host1',
1193+
uuids.node_fake_node2: 'host1'}
1194+
1195+
# The database records for our compute nodes (by UUID) show a
1196+
# hypervisor_hostname of 'host2'
1197+
cn1 = objects.ComputeNode(uuid=uuids.node_fake_node1,
1198+
hypervisor_hostname='host2')
1199+
cn2 = objects.ComputeNode(uuid=uuids.node_fake_node2,
1200+
hypervisor_hostname='host2')
1201+
mock_get_by_uuid.return_value = [cn1, cn2]
1202+
1203+
# Possible hostname (as reported by the virt driver) rename,
1204+
# which should abort our startup
1205+
self.assertRaises(exception.InvalidConfiguration,
1206+
self.compute._get_nodes, self.context)
11831207

11841208
@mock.patch.object(manager.LOG, 'warning')
11851209
@mock.patch.object(
@@ -1202,11 +1226,11 @@ def test_get_nodes_driver_not_ready(
12021226

12031227
@mock.patch.object(manager.LOG, 'warning')
12041228
@mock.patch.object(objects.ComputeNodeList, 'get_all_by_uuids')
1205-
@mock.patch.object(fake_driver.FakeDriver, 'get_available_node_uuids')
1229+
@mock.patch.object(fake_driver.FakeDriver, 'get_nodenames_by_uuid')
12061230
def test_get_nodes_node_not_found(
12071231
self, mock_driver_get_nodes, mock_get_all_by_uuids,
12081232
mock_log_warning):
1209-
mock_driver_get_nodes.return_value = ['fake-node1']
1233+
mock_driver_get_nodes.return_value = {uuids.node_1: 'fake-node1'}
12101234
mock_get_all_by_uuids.return_value = []
12111235

12121236
nodes = self.compute._get_nodes(self.context)
@@ -1215,11 +1239,11 @@ def test_get_nodes_node_not_found(
12151239

12161240
mock_driver_get_nodes.assert_called_once_with()
12171241
mock_get_all_by_uuids.assert_called_once_with(self.context,
1218-
['fake-node1'])
1242+
[uuids.node_1])
12191243
mock_log_warning.assert_called_once_with(
12201244
"Compute nodes %s for host %s were not found in the database. "
12211245
"If this is the first time this service is starting on this host, "
1222-
"then you can ignore this warning.", ['fake-node1'], 'fake-mini')
1246+
"then you can ignore this warning.", [uuids.node_1], 'fake-mini')
12231247

12241248
def test_init_host_disk_devices_configuration_failure(self):
12251249
self.flags(max_disk_devices_to_attach=0, group='compute')

nova/virt/driver.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,8 +1596,10 @@ def get_available_nodes(self, refresh=False):
15961596
"""
15971597
raise NotImplementedError()
15981598

1599-
def get_available_node_uuids(self, refresh=False):
1600-
return [nova.virt.node.get_local_node_uuid()]
1599+
def get_nodenames_by_uuid(self, refresh=False):
1600+
"""Returns a dict of {uuid: nodename} for all managed nodes."""
1601+
nodename = self.get_available_nodes()[0]
1602+
return {nova.virt.node.get_local_node_uuid(): nodename}
16011603

16021604
def node_is_available(self, nodename):
16031605
"""Return whether this compute service manages a particular node."""

nova/virt/fake.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ def get_available_resource(self, nodename):
510510
# control our node uuids. Make sure we return a unique and
511511
# consistent uuid for each node we are responsible for to
512512
# avoid the persistent local node identity from taking over.
513-
host_status['uuid'] = str(getattr(uuids, nodename))
513+
host_status['uuid'] = str(getattr(uuids, 'node_%s' % nodename))
514514
return host_status
515515

516516
def update_provider_tree(self, provider_tree, nodename, allocations=None):
@@ -653,8 +653,9 @@ def get_volume_connector(self, instance):
653653
def get_available_nodes(self, refresh=False):
654654
return self._nodes
655655

656-
def get_available_node_uuids(self, refresh=False):
657-
return [str(getattr(uuids, n)) for n in self.get_available_nodes()]
656+
def get_nodenames_by_uuid(self, refresh=False):
657+
return {str(getattr(uuids, 'node_%s' % n)): n
658+
for n in self.get_available_nodes()}
658659

659660
def instance_on_disk(self, instance):
660661
return False

nova/virt/ironic/driver.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -839,8 +839,12 @@ def get_available_nodes(self, refresh=False):
839839

840840
return node_uuids
841841

842-
def get_available_node_uuids(self, refresh=False):
843-
return self.get_available_nodes(refresh=refresh)
842+
def get_nodenames_by_uuid(self, refresh=False):
843+
nodes = self.get_available_nodes(refresh=refresh)
844+
# We use the uuid for compute_node.uuid and
845+
# compute_node.hypervisor_hostname, so the dict keys and values are
846+
# the same.
847+
return dict(zip(nodes, nodes))
844848

845849
def update_provider_tree(self, provider_tree, nodename, allocations=None):
846850
"""Update a ProviderTree object with current resource provider and

nova/virt/libvirt/driver.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11332,8 +11332,8 @@ def _get_disk_over_committed_size_total(self):
1133211332
def get_available_nodes(self, refresh=False):
1133311333
return [self._host.get_hostname()]
1133411334

11335-
def get_available_node_uuids(self, refresh=False):
11336-
return [self._host.get_node_uuid()]
11335+
def get_nodenames_by_uuid(self, refresh=False):
11336+
return {self._host.get_node_uuid(): self._host.get_hostname()}
1133711337

1133811338
def get_host_cpu_stats(self):
1133911339
"""Return the current CPU state of the host."""

0 commit comments

Comments
 (0)