From 3fe0217b9c4981d077e32fdafc24b2b8814db618 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Tue, 19 Nov 2019 14:45:02 +0000 Subject: [PATCH 1/8] Add functional regression test for bug 1853009 Bug 1853009 describes a race condition involving multiple nova-compute services with ironic. As the compute services start up, the hash ring rebalances, and the compute services have an inconsistent view of which is responsible for a compute node. The sequence of actions here is adapted from a real world log [1], where multiple nova-compute services were started simultaneously. In some cases mocks are used to simulate race conditions. There are three main issues with the behaviour: * host2 deletes the orphan node compute node after host1 has taken ownership of it. * host1 assumes that another compute service will not delete its nodes. Once a node is in rt.compute_nodes, it is not removed again unless the node is orphaned. This prevents host1 from recreating the compute node. * host1 assumes that another compute service will not delete its resource providers. Once an RP is in the provider tree, it is not removed. This functional test documents the current behaviour, with the idea that it can be updated as this behaviour is fixed. [1] http://paste.openstack.org/show/786272/ Co-Authored-By: Matt Riedemann Change-Id: Ice4071722de54e8d20bb8c3795be22f1995940cd Related-Bug: #1853009 Related-Bug: #1853159 (cherry picked from commit 59d9871e8a0672538f8ffc43ae99b3d1c4b08909) (cherry picked from commit c260e75d012cc4fae596d5de185afad6fb24068c) (cherry picked from commit 34d5bca6bc612f41ac3fc33447cd2ba83bf446ba) (cherry picked from commit f57c4965eef8e25656635897f46dccfe23c3fa5c) --- .../regressions/test_bug_1853009.py | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_1853009.py diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py new file mode 100644 index 00000000000..ff6369b710c --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -0,0 +1,190 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import mock + +from nova import context +from nova import objects +from nova.tests.functional import integrated_helpers + + +class NodeRebalanceDeletedComputeNodeRaceTestCase( + integrated_helpers.ProviderUsageBaseTestCase, +): + """Regression test for bug 1853009 observed in Rocky & later. + + When an ironic node re-balances from one host to another, there can be a + race where the old host deletes the orphan compute node after the new host + has taken ownership of it which results in the new host failing to create + the compute node and resource provider because the ResourceTracker does not + detect a change. + """ + # Make sure we're using the fake driver that has predictable uuids + # for each node. + compute_driver = 'fake.PredictableNodeUUIDDriver' + + def _assert_hypervisor_api(self, nodename, expected_host): + # We should have one compute node shown by the API. + hypervisors = self.api.api_get( + '/os-hypervisors/detail' + ).body['hypervisors'] + self.assertEqual(1, len(hypervisors), hypervisors) + hypervisor = hypervisors[0] + self.assertEqual(nodename, hypervisor['hypervisor_hostname']) + self.assertEqual(expected_host, hypervisor['service']['host']) + + def _start_compute(self, host): + host = self.start_service('compute', host) + # Ironic compute driver has rebalances_nodes = True. + host.manager.driver.rebalances_nodes = True + return host + + def setUp(self): + super(NodeRebalanceDeletedComputeNodeRaceTestCase, self).setUp() + + self.nodename = 'fake-node' + self.ctxt = context.get_admin_context() + + def test_node_rebalance_deleted_compute_node_race(self): + # Simulate a service running and then stopping. host_a runs, creates + # fake-node, then is stopped. The fake-node compute node is destroyed. + # This leaves a soft-deleted node in the DB. + host_a = self._start_compute('host_a') + host_a.manager.driver._set_nodes([self.nodename]) + host_a.manager.update_available_resource(self.ctxt) + host_a.stop() + cn = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_a', self.nodename, + ) + cn.destroy() + + self.assertEqual(0, len(objects.ComputeNodeList.get_all(self.ctxt))) + + # Now we create a new compute service to manage our node. + host_b = self._start_compute('host_b') + host_b.manager.driver._set_nodes([self.nodename]) + + # When start_service runs, it will create a host_b ComputeNode. We want + # to delete that and inject our fake node into the driver which will + # be re-balanced to another host later. First assert this actually + # exists. + self._assert_hypervisor_api('host_b', expected_host='host_b') + + # Now run the update_available_resource periodic to register fake-node + # and have it managed by host_b. This will also detect the "host_b" + # node as orphaned and delete it along with its resource provider. + cn_host_b_node = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_b', 'host_b', + ) + + # host_b[1]: Finds no compute record in RT. Tries to create one + # (_init_compute_node). + # FIXME(mgoddard): This shows a traceback with SQL rollback due to + # soft-deleted node. The create seems to succeed but breaks the RT + # update for this node. See + # https://bugs.launchpad.net/nova/+bug/1853159. + host_b.manager.update_available_resource(self.ctxt) + self.assertIn( + 'Deleting orphan compute node %s hypervisor host ' + 'is host_b, nodes are' % cn_host_b_node.id, + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, expected_host='host_b') + # There should only be one resource provider (fake-node). + original_rps = self._get_all_providers() + self.assertEqual(1, len(original_rps), original_rps) + self.assertEqual(self.nodename, original_rps[0]['name']) + + # Simulate a re-balance by restarting host_a and make it manage + # fake-node. At this point both host_b and host_a think they own + # fake-node. + host_a = self._start_compute('host_a') + host_a.manager.driver._set_nodes([self.nodename]) + + # host_a[1]: Finds no compute record in RT, 'moves' existing node from + # host_b + host_a.manager.update_available_resource(self.ctxt) + # Assert that fake-node was re-balanced from host_b to host_a. + self.assertIn( + 'ComputeNode fake-node moving from host_b to host_a', + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, expected_host='host_a') + + # host_a[2]: Begins periodic update, queries compute nodes for this + # host, finds the fake-node. + cn = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_a', self.nodename, + ) + + # host_b[2]: Finds no compute record in RT, 'moves' existing node from + # host_a + host_b.manager.update_available_resource(self.ctxt) + # Assert that fake-node was re-balanced from host_a to host_b. + self.assertIn( + 'ComputeNode fake-node moving from host_a to host_b', + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, expected_host='host_b') + + # Complete rebalance, as host_a realises it does not own fake-node. + host_a.manager.driver._set_nodes([]) + + # host_a[2]: Deletes orphan compute node. + # Mock out the compute node query to simulate a race condition where + # the list includes an orphan compute node that is newly owned by + # host_b by the time host_a attempts to delete it. + # FIXME(mgoddard): Ideally host_a would not delete a node that does not + # belong to it. See https://bugs.launchpad.net/nova/+bug/1853009. + with mock.patch( + 'nova.compute.manager.ComputeManager._get_compute_nodes_in_db' + ) as mock_get: + mock_get.return_value = [cn] + host_a.manager.update_available_resource(self.ctxt) + + # Verify that the node was deleted. + self.assertIn( + 'Deleting orphan compute node %s hypervisor host ' + 'is fake-node, nodes are' % cn.id, + self.stdlog.logger.output) + hypervisors = self.api.api_get( + '/os-hypervisors/detail').body['hypervisors'] + self.assertEqual(0, len(hypervisors), hypervisors) + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # host_b[3]: Should recreate compute node and resource provider. + # FIXME(mgoddard): Compute node not recreated here, because it is + # already in RT.compute_nodes. See + # https://bugs.launchpad.net/nova/+bug/1853009. + # FIXME(mgoddard): Resource provider not recreated here, because it + # exists in the provider tree. See + # https://bugs.launchpad.net/nova/+bug/1841481. + host_b.manager.update_available_resource(self.ctxt) + + # Verify that the node was not recreated. + hypervisors = self.api.api_get( + '/os-hypervisors/detail').body['hypervisors'] + self.assertEqual(0, len(hypervisors), hypervisors) + + # But the compute node exists in the RT. + self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) + + # There is no RP. + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # But the RP exists in the provider tree. + self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( + self.nodename)) + + # This fails due to the lack of a resource provider. + self.assertIn( + 'Skipping removal of allocations for deleted instances', + self.stdlog.logger.output) From faa21f1a3ad129c57090424b0507e37ed4f2cda0 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Wed, 28 Apr 2021 13:53:39 +0100 Subject: [PATCH 2/8] Clear rebalanced compute nodes from resource tracker There is a race condition in nova-compute with the ironic virt driver as nodes get rebalanced. It can lead to compute nodes being removed in the DB and not repopulated. Ultimately this prevents these nodes from being scheduled to. The issue being addressed here is that if a compute node is deleted by a host which thinks it is an orphan, then the compute host that actually owns the node might not recreate it if the node is already in its resource tracker cache. This change fixes the issue by clearing nodes from the resource tracker cache for which a compute node entry does not exist. Then, when the available resource for the node is updated, the compute node object is not found in the cache and gets recreated. Conflicts: nova/tests/unit/compute/test_resource_tracker.py NOTE(melwitt): The conflict is because change I142a1f24ff2219cf308578f0236259d183785cff (Provider Config File: Functions to merge provider configs to provider tree) is not in Ussuri. Change-Id: I39241223b447fcc671161c370dbf16e1773b684a Partial-Bug: #1853009 (cherry picked from commit 32676a9f45807ea8770dc7bdff1e859673af1b61) (cherry picked from commit f950cedf17cc4c3ce9d094dbfde5e4cf013260f7) (cherry picked from commit 32887f82c9bd430f285ff4418811dffdd88e398f) (cherry picked from commit 4b5d0804749685c8afe182b716b7536045432a7e) --- nova/compute/manager.py | 2 + nova/compute/resource_tracker.py | 17 ++++++++ .../regressions/test_bug_1853009.py | 42 ++++++++++++------- nova/tests/unit/compute/test_compute_mgr.py | 6 ++- .../unit/compute/test_resource_tracker.py | 17 ++++++++ 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 5421bc62e55..d18cc5618ad 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -9859,6 +9859,8 @@ def update_available_resource(self, context, startup=False): use_slave=True, startup=startup) + self.rt.clean_compute_node_cache(compute_nodes_in_db) + # Delete orphan compute node not reported by driver but still in db for cn in compute_nodes_in_db: if cn.hypervisor_hostname not in nodenames: diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index c12ada21e7a..5650fdd72de 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1831,3 +1831,20 @@ def finish_evacuation(self, instance, node, migration): if migration: migration.status = 'done' migration.save() + + @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True) + def clean_compute_node_cache(self, compute_nodes_in_db): + """Clean the compute node cache of any nodes that no longer exist. + + :param compute_nodes_in_db: list of ComputeNode objects from the DB. + """ + compute_nodes_in_db_nodenames = {cn.hypervisor_hostname + for cn in compute_nodes_in_db} + stale_cns = set(self.compute_nodes) - compute_nodes_in_db_nodenames + + for stale_cn in stale_cns: + # NOTE(mgoddard): we have found a node in the cache that has no + # compute node in the DB. This could be due to a node rebalance + # where another compute service took ownership of the node. Clean + # up the cache. + self.remove_node(stale_cn) diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py index ff6369b710c..16b8209a852 100644 --- a/nova/tests/functional/regressions/test_bug_1853009.py +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -82,9 +82,6 @@ def test_node_rebalance_deleted_compute_node_race(self): # Now run the update_available_resource periodic to register fake-node # and have it managed by host_b. This will also detect the "host_b" # node as orphaned and delete it along with its resource provider. - cn_host_b_node = objects.ComputeNode.get_by_host_and_nodename( - self.ctxt, 'host_b', 'host_b', - ) # host_b[1]: Finds no compute record in RT. Tries to create one # (_init_compute_node). @@ -93,10 +90,6 @@ def test_node_rebalance_deleted_compute_node_race(self): # update for this node. See # https://bugs.launchpad.net/nova/+bug/1853159. host_b.manager.update_available_resource(self.ctxt) - self.assertIn( - 'Deleting orphan compute node %s hypervisor host ' - 'is host_b, nodes are' % cn_host_b_node.id, - self.stdlog.logger.output) self._assert_hypervisor_api(self.nodename, expected_host='host_b') # There should only be one resource provider (fake-node). original_rps = self._get_all_providers() @@ -160,21 +153,17 @@ def test_node_rebalance_deleted_compute_node_race(self): self.assertEqual(0, len(rps), rps) # host_b[3]: Should recreate compute node and resource provider. - # FIXME(mgoddard): Compute node not recreated here, because it is - # already in RT.compute_nodes. See - # https://bugs.launchpad.net/nova/+bug/1853009. # FIXME(mgoddard): Resource provider not recreated here, because it # exists in the provider tree. See # https://bugs.launchpad.net/nova/+bug/1841481. host_b.manager.update_available_resource(self.ctxt) - # Verify that the node was not recreated. - hypervisors = self.api.api_get( - '/os-hypervisors/detail').body['hypervisors'] - self.assertEqual(0, len(hypervisors), hypervisors) + # Verify that the node was recreated. + self._assert_hypervisor_api(self.nodename, 'host_b') - # But the compute node exists in the RT. - self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) + # But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute + # node is not cached in the RT. + self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes) # There is no RP. rps = self._get_all_providers() @@ -184,6 +173,27 @@ def test_node_rebalance_deleted_compute_node_race(self): self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( self.nodename)) + # host_b[1]: Should add compute node to RT cache and recreate resource + # provider. + # FIXME(mgoddard): Resource provider not recreated here, because it + # exists in the provider tree. See + # https://bugs.launchpad.net/nova/+bug/1841481. + host_b.manager.update_available_resource(self.ctxt) + + # Verify that the node still exists. + self._assert_hypervisor_api(self.nodename, 'host_b') + + # And it is now in the RT cache. + self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) + + # There is still no RP. + rps = self._get_all_providers() + self.assertEqual(0, len(rps), rps) + + # But the RP it exists in the provider tree. + self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( + self.nodename)) + # This fails due to the lack of a resource provider. self.assertIn( 'Skipping removal of allocations for deleted instances', diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 7d777f7725b..623e9562ae9 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -376,18 +376,20 @@ def test_update_available_resource(self, get_db_nodes, get_avail_nodes, ) # First node in set should have been removed from DB + # Last node in set should have been added to DB. for db_node in db_nodes: if db_node.hypervisor_hostname == 'node1': db_node.destroy.assert_called_once_with() rc_mock.delete_resource_provider.assert_called_once_with( self.context, db_node, cascade=True) - mock_rt.remove_node.assert_called_once_with( - 'node1') + mock_rt.remove_node.assert_called_once_with('node1') mock_log.error.assert_called_once_with( "Failed to delete compute node resource provider for " "compute node %s: %s", db_node.uuid, mock.ANY) else: self.assertFalse(db_node.destroy.called) + self.assertEqual(1, mock_rt.remove_node.call_count) + mock_rt.clean_compute_node_cache.assert_called_once_with(db_nodes) @mock.patch('nova.scheduler.client.report.SchedulerReportClient.' 'delete_resource_provider') diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 9ded03602e7..89d58a4bad0 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -3927,3 +3927,20 @@ def foo(self): self.assertRaises(AssertionError, _test_explict_unfair) self.assertRaises(AssertionError, _test_implicit_unfair) + + +class TestCleanComputeNodeCache(BaseTestCase): + + def setUp(self): + super(TestCleanComputeNodeCache, self).setUp() + self._setup_rt() + self.context = context.RequestContext( + mock.sentinel.user_id, mock.sentinel.project_id) + + @mock.patch.object(resource_tracker.ResourceTracker, "remove_node") + def test_clean_compute_node_cache(self, mock_remove): + invalid_nodename = "invalid-node" + self.rt.compute_nodes[_NODENAME] = self.compute + self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute + self.rt.clean_compute_node_cache([self.compute]) + mock_remove.assert_called_once_with(invalid_nodename) From 9224e1d293be8266a07eb7c3738200f2959e616d Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Tue, 19 Nov 2019 16:51:01 +0000 Subject: [PATCH 3/8] Invalidate provider tree when compute node disappears There is a race condition in nova-compute with the ironic virt driver as nodes get rebalanced. It can lead to compute nodes being removed in the DB and not repopulated. Ultimately this prevents these nodes from being scheduled to. The issue being addressed here is that if a compute node is deleted by a host which thinks it is an orphan, then the resource provider for that node might also be deleted. The compute host that owns the node might not recreate the resource provider if it exists in the provider tree cache. This change fixes the issue by clearing resource providers from the provider tree cache for which a compute node entry does not exist. Then, when the available resource for the node is updated, the resource providers are not found in the cache and get recreated in placement. Change-Id: Ia53ff43e6964963cdf295604ba0fb7171389606e Related-Bug: #1853009 Related-Bug: #1841481 (cherry picked from commit 2bb4527228c8e6fa4a1fa6cfbe80e8790e4e0789) (cherry picked from commit 0fc104eeea065579f7fa9b52794d5151baefc84c) (cherry picked from commit 456bcc784340a5674d25fbe6e707c36c583b504f) (cherry picked from commit 8fa5b881c9b8fb78ceca7ae02d808f1fe64d72e4) --- nova/compute/resource_tracker.py | 1 + nova/scheduler/client/report.py | 17 ++++++++++++----- .../regressions/test_bug_1853009.py | 19 ++++++------------- .../unit/compute/test_resource_tracker.py | 8 ++++++-- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 5650fdd72de..a351a21afa6 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -1848,3 +1848,4 @@ def clean_compute_node_cache(self, compute_nodes_in_db): # where another compute service took ownership of the node. Clean # up the cache. self.remove_node(stale_cn) + self.reportclient.invalidate_resource_provider(stale_cn) diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py index 350825af146..8faae5b65ab 100644 --- a/nova/scheduler/client/report.py +++ b/nova/scheduler/client/report.py @@ -676,11 +676,7 @@ def _delete_provider(self, rp_uuid, global_request_id=None): if resp: LOG.info("Deleted resource provider %s", rp_uuid) # clean the caches - try: - self._provider_tree.remove(rp_uuid) - except ValueError: - pass - self._association_refresh_time.pop(rp_uuid, None) + self.invalidate_resource_provider(rp_uuid) return msg = ("[%(placement_req_id)s] Failed to delete resource provider " @@ -2161,6 +2157,17 @@ def delete_resource_provider(self, context, compute_node, cascade=False): # for backward compatibility. pass + def invalidate_resource_provider(self, name_or_uuid): + """Invalidate the cache for a resource provider. + + :param name_or_uuid: Name or UUID of the resource provider to look up. + """ + try: + self._provider_tree.remove(name_or_uuid) + except ValueError: + pass + self._association_refresh_time.pop(name_or_uuid, None) + def get_provider_by_name(self, context, name): """Queries the placement API for resource provider information matching a supplied name. diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py index 16b8209a852..dddc79606f0 100644 --- a/nova/tests/functional/regressions/test_bug_1853009.py +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -153,9 +153,8 @@ def test_node_rebalance_deleted_compute_node_race(self): self.assertEqual(0, len(rps), rps) # host_b[3]: Should recreate compute node and resource provider. - # FIXME(mgoddard): Resource provider not recreated here, because it - # exists in the provider tree. See - # https://bugs.launchpad.net/nova/+bug/1841481. + # FIXME(mgoddard): Resource provider not recreated here, due to + # https://bugs.launchpad.net/nova/+bug/1853159. host_b.manager.update_available_resource(self.ctxt) # Verify that the node was recreated. @@ -170,14 +169,11 @@ def test_node_rebalance_deleted_compute_node_race(self): self.assertEqual(0, len(rps), rps) # But the RP exists in the provider tree. - self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( + self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists( self.nodename)) # host_b[1]: Should add compute node to RT cache and recreate resource # provider. - # FIXME(mgoddard): Resource provider not recreated here, because it - # exists in the provider tree. See - # https://bugs.launchpad.net/nova/+bug/1841481. host_b.manager.update_available_resource(self.ctxt) # Verify that the node still exists. @@ -186,13 +182,10 @@ def test_node_rebalance_deleted_compute_node_race(self): # And it is now in the RT cache. self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) - # There is still no RP. + # The resource provider has now been created. rps = self._get_all_providers() - self.assertEqual(0, len(rps), rps) - - # But the RP it exists in the provider tree. - self.assertTrue(host_b.manager.rt.reportclient._provider_tree.exists( - self.nodename)) + self.assertEqual(1, len(rps), rps) + self.assertEqual(self.nodename, rps[0]['name']) # This fails due to the lack of a resource provider. self.assertIn( diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index 89d58a4bad0..cd77f70e4e2 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -3942,5 +3942,9 @@ def test_clean_compute_node_cache(self, mock_remove): invalid_nodename = "invalid-node" self.rt.compute_nodes[_NODENAME] = self.compute self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute - self.rt.clean_compute_node_cache([self.compute]) - mock_remove.assert_called_once_with(invalid_nodename) + with mock.patch.object( + self.rt.reportclient, "invalidate_resource_provider", + ) as mock_invalidate: + self.rt.clean_compute_node_cache([self.compute]) + mock_remove.assert_called_once_with(invalid_nodename) + mock_invalidate.assert_called_once_with(invalid_nodename) From 0e638445a505a69a3c4a2ac172bdb5960ce2a869 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Mon, 18 Nov 2019 12:06:47 +0000 Subject: [PATCH 4/8] Prevent deletion of a compute node belonging to another host There is a race condition in nova-compute with the ironic virt driver as nodes get rebalanced. It can lead to compute nodes being removed in the DB and not repopulated. Ultimately this prevents these nodes from being scheduled to. The main race condition involved is in update_available_resources in the compute manager. When the list of compute nodes is queried, there is a compute node belonging to the host that it does not expect to be managing, i.e. it is an orphan. Between that time and deleting the orphan, the real owner of the compute node takes ownership of it ( in the resource tracker). However, the node is still deleted as the first host is unaware of the ownership change. This change prevents this from occurring by filtering on the host when deleting a compute node. If another compute host has taken ownership of a node, it will have updated the host field and this will prevent deletion from occurring. The first host sees this has happened via the ComputeHostNotFound exception, and avoids deleting its resource provider. Co-Authored-By: melanie witt Conflicts: nova/compute/manager.py NOTE(melwitt): The conflict is because change I23bb9e539d08f5c6202909054c2dd49b6c7a7a0e (Remove six.text_type (1/2)) is not in Victoria. Closes-Bug: #1853009 Related-Bug: #1841481 Change-Id: I260c1fded79a85d4899e94df4d9036a1ee437f02 (cherry picked from commit a8492e88783b40f6dc61888fada232f0d00d6acf) (cherry picked from commit cbbca58504275f194ec55eeb89dad4a496d98060) (cherry picked from commit 737a4ccd1398a5cf28a67547f226b668f246c784) (cherry picked from commit 9aef6370d142a933bb8120b7a4fc71e65e3023bf) --- nova/compute/manager.py | 34 +++++++++++++------ nova/db/sqlalchemy/api.py | 20 ++++++++--- nova/objects/compute_node.py | 15 +++++++- .../regressions/test_bug_1853009.py | 25 ++++++++++---- nova/tests/unit/compute/test_compute.py | 5 +-- nova/tests/unit/compute/test_compute_mgr.py | 27 +++++++++++++++ nova/tests/unit/db/test_db_api.py | 7 ++++ nova/tests/unit/objects/test_compute_node.py | 15 ++++++-- .../notes/bug-1853009-99414e14d1491b5f.yaml | 7 ++++ 9 files changed, 128 insertions(+), 27 deletions(-) create mode 100644 releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml diff --git a/nova/compute/manager.py b/nova/compute/manager.py index d18cc5618ad..1e351e6742c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -9869,17 +9869,31 @@ def update_available_resource(self, context, startup=False): "nodes are %(nodes)s", {'id': cn.id, 'hh': cn.hypervisor_hostname, 'nodes': nodenames}) - cn.destroy() - self.rt.remove_node(cn.hypervisor_hostname) - # Delete the corresponding resource provider in placement, - # along with any associated allocations. try: - self.reportclient.delete_resource_provider(context, cn, - cascade=True) - except keystone_exception.ClientException as e: - LOG.error( - "Failed to delete compute node resource provider " - "for compute node %s: %s", cn.uuid, six.text_type(e)) + cn.destroy() + except exception.ObjectActionError: + # NOTE(mgoddard): it's possible that another compute + # service took ownership of this compute node since we + # queried it due to a rebalance, and this will cause the + # deletion to fail. Ignore the error in that case. + LOG.info("Ignoring failure to delete orphan compute node " + "%(id)s on hypervisor host %(hh)s due to " + "possible node rebalance", + {'id': cn.id, 'hh': cn.hypervisor_hostname}) + self.rt.remove_node(cn.hypervisor_hostname) + self.reportclient.invalidate_resource_provider(cn.uuid) + else: + self.rt.remove_node(cn.hypervisor_hostname) + # Delete the corresponding resource provider in placement, + # along with any associated allocations. + try: + self.reportclient.delete_resource_provider( + context, cn, cascade=True) + except keystone_exception.ClientException as e: + LOG.error( + "Failed to delete compute node resource provider " + "for compute node %s: %s", cn.uuid, + six.text_type(e)) for nodename in nodenames: self._update_available_resource_for_node(context, nodename, diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index 1fa7f5a322b..f3dec418dbc 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -763,14 +763,24 @@ def compute_node_update(context, compute_id, values): @pick_context_manager_writer -def compute_node_delete(context, compute_id): +def compute_node_delete(context, compute_id, constraint=None): """Delete a ComputeNode record.""" - result = model_query(context, models.ComputeNode).\ - filter_by(id=compute_id).\ - soft_delete(synchronize_session=False) + query = model_query(context, models.ComputeNode).filter_by(id=compute_id) + + if constraint is not None: + query = constraint.apply(models.ComputeNode, query) + + result = query.soft_delete(synchronize_session=False) if not result: - raise exception.ComputeHostNotFound(host=compute_id) + # The soft_delete could fail for one of two reasons: + # 1) The compute node no longer exists + # 2) The constraint, if specified, was not met + # Try to read the compute node and let it raise ComputeHostNotFound if + # 1) happened. + compute_node_get(context, compute_id) + # Else, raise ConstraintNotMet if 2) happened. + raise exception.ConstraintNotMet() @pick_context_manager_reader diff --git a/nova/objects/compute_node.py b/nova/objects/compute_node.py index b7f7f966b9d..33782bc4419 100644 --- a/nova/objects/compute_node.py +++ b/nova/objects/compute_node.py @@ -354,7 +354,20 @@ def save(self, prune_stats=False): @base.remotable def destroy(self): - db.compute_node_delete(self._context, self.id) + if self.obj_attr_is_set('host') and self.host: + # NOTE(melwitt): If our host is set, avoid a race between + # nova-computes during ironic driver node rebalances which can + # change node ownership. + constraint = db.constraint(host=db.equal_any(self.host)) + else: + constraint = None + + try: + sa_api.compute_node_delete( + self._context, self.id, constraint=constraint) + except exception.ConstraintNotMet: + raise exception.ObjectActionError(action='destroy', + reason='host changed') def update_from_virt_driver(self, resources): # NOTE(pmurray): the virt driver provides a dict of values that diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py index dddc79606f0..a3ccd69b1b6 100644 --- a/nova/tests/functional/regressions/test_bug_1853009.py +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -133,24 +133,35 @@ def test_node_rebalance_deleted_compute_node_race(self): # Mock out the compute node query to simulate a race condition where # the list includes an orphan compute node that is newly owned by # host_b by the time host_a attempts to delete it. - # FIXME(mgoddard): Ideally host_a would not delete a node that does not - # belong to it. See https://bugs.launchpad.net/nova/+bug/1853009. with mock.patch( 'nova.compute.manager.ComputeManager._get_compute_nodes_in_db' ) as mock_get: mock_get.return_value = [cn] host_a.manager.update_available_resource(self.ctxt) - # Verify that the node was deleted. + # Verify that the node was almost deleted, but was saved by the host + # check self.assertIn( 'Deleting orphan compute node %s hypervisor host ' 'is fake-node, nodes are' % cn.id, self.stdlog.logger.output) - hypervisors = self.api.api_get( - '/os-hypervisors/detail').body['hypervisors'] - self.assertEqual(0, len(hypervisors), hypervisors) + self.assertIn( + 'Ignoring failure to delete orphan compute node %s on ' + 'hypervisor host fake-node' % cn.id, + self.stdlog.logger.output) + self._assert_hypervisor_api(self.nodename, 'host_b') rps = self._get_all_providers() - self.assertEqual(0, len(rps), rps) + self.assertEqual(1, len(rps), rps) + self.assertEqual(self.nodename, rps[0]['name']) + + # Simulate deletion of an orphan by host_a. It shouldn't happen + # anymore, but let's assume it already did. + cn = objects.ComputeNode.get_by_host_and_nodename( + self.ctxt, 'host_b', self.nodename) + cn.destroy() + host_a.manager.rt.remove_node(cn.hypervisor_hostname) + host_a.manager.reportclient.delete_resource_provider( + self.ctxt, cn, cascade=True) # host_b[3]: Should recreate compute node and resource provider. # FIXME(mgoddard): Resource provider not recreated here, due to diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index c12d83ca550..57101d6d184 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -202,13 +202,14 @@ def fake_get_compute_nodes_in_db(self, context, *args, **kwargs): context, objects.ComputeNode(), cn) for cn in fake_compute_nodes] - def fake_compute_node_delete(context, compute_node_id): + def fake_compute_node_delete(context, compute_node_id, + constraint=None): self.assertEqual(2, compute_node_id) self.stub_out( 'nova.compute.manager.ComputeManager._get_compute_nodes_in_db', fake_get_compute_nodes_in_db) - self.stub_out('nova.db.api.compute_node_delete', + self.stub_out('nova.db.sqlalchemy.api.compute_node_delete', fake_compute_node_delete) self.compute.update_available_resource( diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 623e9562ae9..f1aae98c6c6 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -411,6 +411,33 @@ def test_update_available_resource_not_ready(self, get_db_nodes, update_mock.assert_not_called() del_rp_mock.assert_not_called() + @mock.patch.object(manager.ComputeManager, + '_update_available_resource_for_node') + @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes') + @mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db') + def test_update_available_resource_destroy_rebalance( + self, get_db_nodes, get_avail_nodes, update_mock): + mock_rt = self._mock_rt() + rc_mock = self.useFixture(fixtures.fixtures.MockPatchObject( + self.compute, 'reportclient')).mock + db_nodes = [self._make_compute_node('node1', 1)] + get_db_nodes.return_value = db_nodes + # Destroy can fail if nodes were rebalanced between getting the node + # list and calling destroy. + db_nodes[0].destroy.side_effect = exception.ObjectActionError( + action='destroy', reason='host changed') + get_avail_nodes.return_value = set() + self.compute.update_available_resource(self.context) + get_db_nodes.assert_called_once_with(self.context, set(), + use_slave=True, startup=False) + self.assertEqual(0, update_mock.call_count) + + db_nodes[0].destroy.assert_called_once_with() + self.assertEqual(0, rc_mock.delete_resource_provider.call_count) + mock_rt.remove_node.assert_called_once_with('node1') + rc_mock.invalidate_resource_provider.assert_called_once_with( + db_nodes[0].uuid) + @mock.patch('nova.context.get_admin_context') def test_pre_start_hook(self, get_admin_context): """Very simple test just to make sure update_available_resource is diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 6b80f216612..21b94f9c728 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -5571,6 +5571,13 @@ def test_compute_node_delete(self): nodes = db.compute_node_get_all(self.ctxt) self.assertEqual(len(nodes), 0) + def test_compute_node_delete_different_host(self): + compute_node_id = self.item['id'] + constraint = db.constraint(host=db.equal_any('different-host')) + self.assertRaises(exception.ConstraintNotMet, + sqlalchemy_api.compute_node_delete, + self.ctxt, compute_node_id, constraint=constraint) + def test_compute_node_search_by_hypervisor(self): nodes_created = [] new_service = copy.copy(self.service_dict) diff --git a/nova/tests/unit/objects/test_compute_node.py b/nova/tests/unit/objects/test_compute_node.py index aeaf4b957fe..747cc1a36eb 100644 --- a/nova/tests/unit/objects/test_compute_node.py +++ b/nova/tests/unit/objects/test_compute_node.py @@ -24,6 +24,7 @@ from nova import conf from nova.db import api as db +from nova.db.sqlalchemy import api as sa_api from nova import exception from nova import objects from nova.objects import base @@ -410,12 +411,22 @@ def test_set_id_failure(self, mock_get, db_mock): self.assertRaises(ovo_exc.ReadOnlyFieldError, setattr, compute, 'id', 124) - @mock.patch.object(db, 'compute_node_delete') + @mock.patch.object(sa_api, 'compute_node_delete') def test_destroy(self, mock_delete): compute = compute_node.ComputeNode(context=self.context) compute.id = 123 compute.destroy() - mock_delete.assert_called_once_with(self.context, 123) + mock_delete.assert_called_once_with(self.context, 123, constraint=None) + + def test_destroy_host_constraint(self): + # Create compute node with host='fake' + compute = fake_compute_with_resources.obj_clone() + compute._context = self.context + compute.host = 'fake' + compute.create() + # Simulate a compute node ownership change due to a node rebalance + compute.host = 'different' + self.assertRaises(exception.ObjectActionError, compute.destroy) @mock.patch.object(db, 'compute_node_get_all') def test_get_all(self, mock_get_all): diff --git a/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml b/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml new file mode 100644 index 00000000000..d6e71c42224 --- /dev/null +++ b/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixes an issue with multiple ``nova-compute`` services used with Ironic, + where a rebalance operation could result in a compute node being deleted + from the database and not recreated. See `bug 1853009 + `__ for details. From 22656aa979475d6445a6ac8dae6ba8850f43ccec Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Wed, 20 Nov 2019 12:01:33 +0000 Subject: [PATCH 5/8] Fix inactive session error in compute node creation In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be restored, to ensure we can reuse ironic node UUIDs as compute node UUIDs. While this seems to largely work, it results in some nasty errors being generated [3]: InvalidRequestError This session is in 'inactive' state, due to the SQL transaction being rolled back; no further SQL can be emitted within this transaction. This happens because compute_node_create is decorated with pick_context_manager_writer, which begins a transaction. While _compute_node_get_and_update_deleted claims that calling a second pick_context_manager_writer decorated function will begin a new subtransaction, this does not appear to be the case. This change removes pick_context_manager_writer from the compute_node_create function, and adds a new _compute_node_create function which ensures the transaction is finished if _compute_node_get_and_update_deleted is called. The new unit test added here fails without this change. This change marks the removal of the final FIXME from the functional test added in [4]. [1] https://bugs.launchpad.net/nova/+bug/1839560 [2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411 [3] http://paste.openstack.org/show/786350/ [4] https://review.opendev.org/#/c/695012/ Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73 Closes-Bug: #1853159 Related-Bug: #1853009 (cherry picked from commit 2383cbb4a518821d245fce316b3778c8ba8e5246) (cherry picked from commit 665c053315439e1345aa131f4839945d662fb3f3) (cherry picked from commit b1331d0bed7a414591a389f5eca8700fe2fec136) (cherry picked from commit 43e333f39cb3fb60db136205fddba31ddb97ecf2) --- nova/db/sqlalchemy/api.py | 17 ++++++++-- .../regressions/test_bug_1853009.py | 33 ------------------- nova/tests/unit/db/test_db_api.py | 16 +++++++++ 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index f3dec418dbc..81b9b92a186 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -690,7 +690,7 @@ def compute_node_search_by_hypervisor(context, hypervisor_match): @pick_context_manager_writer -def compute_node_create(context, values): +def _compute_node_create(context, values): """Creates a new ComputeNode and populates the capacity fields with the most recent data. """ @@ -698,8 +698,21 @@ def compute_node_create(context, values): compute_node_ref = models.ComputeNode() compute_node_ref.update(values) + compute_node_ref.save(context.session) + return compute_node_ref + + +# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer, +# so that we get a separate transaction in the exception handler. This avoids +# an error message about inactive DB sessions during a transaction rollback. +# See https://bugs.launchpad.net/nova/+bug/1853159. +def compute_node_create(context, values): + """Creates a new ComputeNode and populates the capacity fields + with the most recent data. Will restore a soft deleted compute node if a + UUID has been explicitly requested. + """ try: - compute_node_ref.save(context.session) + compute_node_ref = _compute_node_create(context, values) except db_exc.DBDuplicateEntry: with excutils.save_and_reraise_exception(logger=LOG) as err_ctx: # Check to see if we have a (soft) deleted ComputeNode with the diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py index a3ccd69b1b6..2ec69482a25 100644 --- a/nova/tests/functional/regressions/test_bug_1853009.py +++ b/nova/tests/functional/regressions/test_bug_1853009.py @@ -85,10 +85,6 @@ def test_node_rebalance_deleted_compute_node_race(self): # host_b[1]: Finds no compute record in RT. Tries to create one # (_init_compute_node). - # FIXME(mgoddard): This shows a traceback with SQL rollback due to - # soft-deleted node. The create seems to succeed but breaks the RT - # update for this node. See - # https://bugs.launchpad.net/nova/+bug/1853159. host_b.manager.update_available_resource(self.ctxt) self._assert_hypervisor_api(self.nodename, expected_host='host_b') # There should only be one resource provider (fake-node). @@ -164,41 +160,12 @@ def test_node_rebalance_deleted_compute_node_race(self): self.ctxt, cn, cascade=True) # host_b[3]: Should recreate compute node and resource provider. - # FIXME(mgoddard): Resource provider not recreated here, due to - # https://bugs.launchpad.net/nova/+bug/1853159. host_b.manager.update_available_resource(self.ctxt) # Verify that the node was recreated. self._assert_hypervisor_api(self.nodename, 'host_b') - # But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute - # node is not cached in the RT. - self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes) - - # There is no RP. - rps = self._get_all_providers() - self.assertEqual(0, len(rps), rps) - - # But the RP exists in the provider tree. - self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists( - self.nodename)) - - # host_b[1]: Should add compute node to RT cache and recreate resource - # provider. - host_b.manager.update_available_resource(self.ctxt) - - # Verify that the node still exists. - self._assert_hypervisor_api(self.nodename, 'host_b') - - # And it is now in the RT cache. - self.assertIn(self.nodename, host_b.manager.rt.compute_nodes) - # The resource provider has now been created. rps = self._get_all_providers() self.assertEqual(1, len(rps), rps) self.assertEqual(self.nodename, rps[0]['name']) - - # This fails due to the lack of a resource provider. - self.assertIn( - 'Skipping removal of allocations for deleted instances', - self.stdlog.logger.output) diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py index 21b94f9c728..3b26735e381 100644 --- a/nova/tests/unit/db/test_db_api.py +++ b/nova/tests/unit/db/test_db_api.py @@ -5283,6 +5283,22 @@ def test_compute_node_create_duplicate_host_hypervisor_hostname(self): self.assertRaises(db_exc.DBDuplicateEntry, db.compute_node_create, self.ctxt, other_node) + def test_compute_node_create_duplicate_uuid(self): + """Tests to make sure that no exception is raised when trying to create + a compute node with the same host, hypervisor_hostname and uuid values + as another compute node that was previously soft-deleted. + """ + # Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this + # raised the following error: + # sqlalchemy.exc.InvalidRequestError: This session is in 'inactive' + # state, due to the SQL transaction being rolled back; no further SQL + # can be emitted within this transaction. + constraint = db.constraint(host=db.equal_any(self.item['host'])) + sqlalchemy_api.compute_node_delete( + self.ctxt, self.item['id'], constraint=constraint) + new_node = db.compute_node_create(self.ctxt, self.compute_node_dict) + self.assertEqual(self.item['uuid'], new_node['uuid']) + def test_compute_node_get_all(self): nodes = db.compute_node_get_all(self.ctxt) self.assertEqual(1, len(nodes)) From c0adddea6d0beddf46482586bb83133b8ffffc1f Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 8 Oct 2021 14:35:00 -0700 Subject: [PATCH 6/8] Ignore plug_vifs on the ironic driver When the nova-compute service starts, by default it attempts to startup instance configuration states for aspects such as networking. This is fine in most cases, and makes a lot of sense if the nova-compute service is just managing virtual machines on a hypervisor. This is done, one instance at a time. However, when the compute driver is ironic, the networking is managed as part of the physical machine lifecycle potentially all the way into committed switch configurations. As such, there is no need to attempt to call ``plug_vifs`` on every single instance managed by the nova-compute process which is backed by Ironic. Additionally, using ironic tends to manage far more physical machines per nova-compute service instance then when when operating co-installed with a hypervisor. Often this means a cluster of a thousand machines, with three controllers, will see thousands of un-needed API calls upon service start, which elongates the entire process and negatively impacts operations. In essence, nova.virt.ironic's plug_vifs call now does nothing, and merely issues a debug LOG entry when called. Closes-Bug: #1777608 Change-Id: Iba87cef50238c5b02ab313f2311b826081d5b4ab (cherry picked from commit 7f81cf28bf21ad2afa98accfde3087c83b8e269b) (cherry picked from commit eb6d70f02daa14920a2522e5c734a3775ea2ea7c) (cherry picked from commit b8d8e912e0123b0f6af3a21183149c0f492330e6) (cherry picked from commit 54a0f08e4db1a55f3be233ca63e6100769a722ad) (cherry picked from commit b6cc159eb7a199a83a2489a06f92837e77e5e3b1) --- nova/tests/unit/virt/ironic/test_driver.py | 33 +++++++++++-------- nova/virt/ironic/driver.py | 21 +++++++++--- ...art-port-attachments-3282e9ea051561d4.yaml | 11 +++++++ 3 files changed, 47 insertions(+), 18 deletions(-) create mode 100644 releasenotes/notes/fix-ironic-compute-restart-port-attachments-3282e9ea051561d4.yaml diff --git a/nova/tests/unit/virt/ironic/test_driver.py b/nova/tests/unit/virt/ironic/test_driver.py index ef8b696af0e..2782cf498b9 100644 --- a/nova/tests/unit/virt/ironic/test_driver.py +++ b/nova/tests/unit/virt/ironic/test_driver.py @@ -2018,17 +2018,12 @@ def test_plug_vifs_with_port(self, mock_vatt): @mock.patch.object(ironic_driver.IronicDriver, '_plug_vifs') def test_plug_vifs(self, mock__plug_vifs): node_id = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' - node = _get_cached_node(id=node_id) - - self.mock_conn.get_node.return_value = node instance = fake_instance.fake_instance_obj(self.ctx, node=node_id) network_info = utils.get_test_network_info() self.driver.plug_vifs(instance, network_info) - self.mock_conn.get_node.assert_called_once_with( - node_id, fields=ironic_driver._NODE_FIELDS) - mock__plug_vifs.assert_called_once_with(node, instance, network_info) + mock__plug_vifs.assert_not_called() @mock.patch.object(FAKE_CLIENT.node, 'vif_attach') def test_plug_vifs_multiple_ports(self, mock_vatt): @@ -2162,11 +2157,17 @@ def test_unplug_vifs_no_network_info(self, mock_vdet): self.driver.unplug_vifs(instance, network_info) self.assertFalse(mock_vdet.called) - @mock.patch.object(ironic_driver.IronicDriver, 'plug_vifs') + @mock.patch.object(ironic_driver.IronicDriver, '_plug_vifs') def test_attach_interface(self, mock_pv): - self.driver.attach_interface('fake_context', 'fake_instance', + node_uuid = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + node = _get_cached_node(uuid=node_uuid) + instance = fake_instance.fake_instance_obj(self.ctx, + node=node_uuid) + self.mock_conn.get_node.return_value = node + + self.driver.attach_interface('fake_context', instance, 'fake_image_meta', 'fake_vif') - mock_pv.assert_called_once_with('fake_instance', ['fake_vif']) + mock_pv.assert_called_once_with(node, instance, ['fake_vif']) @mock.patch.object(ironic_driver.IronicDriver, 'unplug_vifs') def test_detach_interface(self, mock_uv): @@ -2459,17 +2460,23 @@ def test_get_volume_connector_no_ip_without_mac(self): def test_get_volume_connector_no_ip_no_fixed_ip(self): self._test_get_volume_connector_no_ip(False, no_fixed_ip=True) - @mock.patch.object(ironic_driver.IronicDriver, 'plug_vifs') + @mock.patch.object(ironic_driver.IronicDriver, '_plug_vifs') def test_prepare_networks_before_block_device_mapping(self, mock_pvifs): + node_uuid = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + node = _get_cached_node(uuid=node_uuid) + self.mock_conn.get_node.return_value = node instance = fake_instance.fake_instance_obj(self.ctx) network_info = utils.get_test_network_info() self.driver.prepare_networks_before_block_device_mapping(instance, network_info) - mock_pvifs.assert_called_once_with(instance, network_info) + mock_pvifs.assert_called_once_with(node, instance, network_info) - @mock.patch.object(ironic_driver.IronicDriver, 'plug_vifs') + @mock.patch.object(ironic_driver.IronicDriver, '_plug_vifs') def test_prepare_networks_before_block_device_mapping_error(self, mock_pvifs): + node_uuid = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' + node = _get_cached_node(uuid=node_uuid) + self.mock_conn.get_node.return_value = node instance = fake_instance.fake_instance_obj(self.ctx) network_info = utils.get_test_network_info() mock_pvifs.side_effect = ironic_exception.BadRequest('fake error') @@ -2477,7 +2484,7 @@ def test_prepare_networks_before_block_device_mapping_error(self, ironic_exception.BadRequest, self.driver.prepare_networks_before_block_device_mapping, instance, network_info) - mock_pvifs.assert_called_once_with(instance, network_info) + mock_pvifs.assert_called_once_with(node, instance, network_info) @mock.patch.object(ironic_driver.IronicDriver, 'unplug_vifs') def test_clean_networks_preparation(self, mock_upvifs): diff --git a/nova/virt/ironic/driver.py b/nova/virt/ironic/driver.py index c1ad4996c3f..2542b672276 100644 --- a/nova/virt/ironic/driver.py +++ b/nova/virt/ironic/driver.py @@ -1608,13 +1608,21 @@ def _unplug_vifs(self, node, instance, network_info): def plug_vifs(self, instance, network_info): """Plug VIFs into networks. + This method is present for compatability. Any call will result + in a DEBUG log entry being generated, and will otherwise be + ignored, as Ironic manages VIF attachments through a node + lifecycle. Please see ``attach_interface``, which is the + proper and current method to utilize. + :param instance: The instance object. :param network_info: Instance network information. """ - # instance.node is the ironic node's UUID. - node = self._get_node(instance.node) - self._plug_vifs(node, instance, network_info) + LOG.debug('VIF plug called for instance %(instance)s on node ' + '%(node)s, however Ironic manages VIF attachments ' + 'for nodes.', + {'instance': instance.uuid, + 'node': instance.node}) def unplug_vifs(self, instance, network_info): """Unplug VIFs from networks. @@ -1645,7 +1653,8 @@ def attach_interface(self, context, instance, image_meta, vif): # event from neutron or by _heal_instance_info_cache periodic task. In # both cases, this is done asynchronously, so the cache may not be up # to date immediately after attachment. - self.plug_vifs(instance, [vif]) + node = self._get_node(instance.node) + self._plug_vifs(node, instance, [vif]) def detach_interface(self, context, instance, vif): """Use hotunplug to remove a network interface from a running instance. @@ -1962,7 +1971,9 @@ def prepare_networks_before_block_device_mapping(self, instance, """ try: - self.plug_vifs(instance, network_info) + node = self._get_node(instance.node) + self._plug_vifs(node, instance, network_info) + except Exception: with excutils.save_and_reraise_exception(): LOG.error("Error preparing deploy for instance " diff --git a/releasenotes/notes/fix-ironic-compute-restart-port-attachments-3282e9ea051561d4.yaml b/releasenotes/notes/fix-ironic-compute-restart-port-attachments-3282e9ea051561d4.yaml new file mode 100644 index 00000000000..2c2c9e78382 --- /dev/null +++ b/releasenotes/notes/fix-ironic-compute-restart-port-attachments-3282e9ea051561d4.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + Fixes slow compute restart when using the ``nova.virt.ironic`` compute + driver where the driver was previously attempting to attach VIFS on + start-up via the ``plug_vifs`` driver method. This method has grown + otherwise unused since the introduction of the ``attach_interface`` + method of attaching VIFs. As Ironic manages the attachment of VIFs to + baremetal nodes in order to align with the security requirements of a + physical baremetal node's lifecycle. The ironic driver now ignores calls + to the ``plug_vifs`` method. From 1c2b2aa362507d633bdb07247bf6f4358fe02c0b Mon Sep 17 00:00:00 2001 From: Kashyap Chamarthy Date: Thu, 28 Jan 2021 16:35:10 +0100 Subject: [PATCH 7/8] libvirt: Add a workaround to skip compareCPU() on destination Nova's use of libvirt's compareCPU() API served its purpose over the years, but its design limitations break live migration in subtle ways. For example, the compareCPU() API compares against the host physical CPUID. Some of the features from this CPUID aren not exposed by KVM, and then there are some features that KVM emulates that are not in the host CPUID. The latter can cause bogus live migration failures. With QEMU >=2.9 and libvirt >= 4.4.0, libvirt will do the right thing in terms of CPU compatibility checks on the destination host during live migration. Nova satisfies these minimum version requirements by a good margin. So, provide a workaround to skip the CPU comparison check on the destination host before migrating a guest, and let libvirt handle it correctly. This workaround will be removed once Nova replaces the older libvirt APIs with their newer and improved counterparts[1][2]. - - - Note that Nova's libvirt driver calls compareCPU() in another method, _check_cpu_compatibility(); I did not remove its usage yet. As it needs more careful combing of the code, and then: - where possible, remove the usage of compareCPU() altogether, and rely on libvirt doing the right thing under the hood; or - where Nova _must_ do the CPU comparison checks, switch to the better libvirt CPU APIs -- baselineHypervisorCPU() and compareHypervisorCPU() -- that are described here[1]. This is work in progress[2]. [1] https://opendev.org/openstack/nova-specs/commit/70811da221035044e27 [2] https://review.opendev.org/q/topic:bp%252Fcpu-selection-with-hypervisor-consideration Change-Id: I444991584118a969e9ea04d352821b07ec0ba88d Closes-Bug: #1913716 Signed-off-by: Kashyap Chamarthy Signed-off-by: Balazs Gibizer (cherry picked from commit 267a40663cd8d0b94bbc5ebda4ece55a45753b64) --- nova/conf/workarounds.py | 8 +++++++ nova/tests/unit/virt/libvirt/test_driver.py | 19 +++++++++++++++ nova/virt/libvirt/driver.py | 19 ++++++++------- ...-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml | 24 +++++++++++++++++++ 4 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 8eadc0b6ec3..ee2abad4d09 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -345,6 +345,14 @@ * :oslo.config:option:`DEFAULT.instances_path` * :oslo.config:option:`image_cache.subdirectory_name` * :oslo.config:option:`update_resources_interval` +"""), + cfg.BoolOpt('skip_cpu_compare_on_dest', + default=False, + help=""" +With the libvirt driver, during live migration, skip comparing guest CPU +with the destination host. When using QEMU >= 2.9 and libvirt >= +4.4.0, libvirt will do the correct thing with respect to checking CPU +compatibility on the destination host during live migration. """), ] diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index cea25218936..f5b82065869 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -10514,6 +10514,25 @@ def test_check_can_live_migrate_guest_cpu_none_model( '_create_shared_storage_test_file', return_value='fake') @mock.patch.object(libvirt_driver.LibvirtDriver, '_compare_cpu') + def test_check_can_live_migrate_guest_cpu_none_model_skip_compare( + self, mock_cpu, mock_test_file): + self.flags(group='workarounds', skip_cpu_compare_on_dest=True) + instance_ref = objects.Instance(**self.test_instance) + instance_ref.vcpu_model = test_vcpu_model.fake_vcpumodel + instance_ref.vcpu_model.model = None + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + compute_info = {'cpu_info': 'asdf', 'disk_available_least': 1} + drvr.check_can_live_migrate_destination( + self.context, instance_ref, compute_info, compute_info) + mock_cpu.assert_not_called() + + @mock.patch( + 'nova.network.neutron.API.has_port_binding_extension', + new=mock.Mock(return_value=False)) + @mock.patch.object(libvirt_driver.LibvirtDriver, + '_create_shared_storage_test_file', + return_value='fake') + @mock.patch.object(libvirt_driver.LibvirtDriver, '_compare_cpu') def test_check_can_live_migrate_dest_numa_lm( self, mock_cpu, mock_test_file): instance_ref = objects.Instance(**self.test_instance) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 08251516e30..0a505f3bcc9 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -8445,15 +8445,16 @@ def check_can_live_migrate_destination(self, context, instance, disk_available_mb = ( (disk_available_gb * units.Ki) - CONF.reserved_host_disk_mb) - # Compare CPU - try: - if not instance.vcpu_model or not instance.vcpu_model.model: - source_cpu_info = src_compute_info['cpu_info'] - self._compare_cpu(None, source_cpu_info, instance) - else: - self._compare_cpu(instance.vcpu_model, None, instance) - except exception.InvalidCPUInfo as e: - raise exception.MigrationPreCheckError(reason=e) + if not CONF.workarounds.skip_cpu_compare_on_dest: + # Compare CPU + try: + if not instance.vcpu_model or not instance.vcpu_model.model: + source_cpu_info = src_compute_info['cpu_info'] + self._compare_cpu(None, source_cpu_info, instance) + else: + self._compare_cpu(instance.vcpu_model, None, instance) + except exception.InvalidCPUInfo as e: + raise exception.MigrationPreCheckError(reason=e) # Create file on storage, to be checked on source host filename = self._create_shared_storage_test_file(instance) diff --git a/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml b/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml new file mode 100644 index 00000000000..e7cd4041b16 --- /dev/null +++ b/releasenotes/notes/skip-compare-cpu-on-dest-6ae419ddd61fd0f8.yaml @@ -0,0 +1,24 @@ +--- +issues: + - | + Nova's use of libvirt's compareCPU() API served its purpose over the + years, but its design limitations break live migration in subtle + ways. For example, the compareCPU() API compares against the host + physical CPUID. Some of the features from this CPUID aren not + exposed by KVM, and then there are some features that KVM emulates + that are not in the host CPUID. The latter can cause bogus live + migration failures. + + With QEMU >=2.9 and libvirt >= 4.4.0, libvirt will do the right + thing in terms of CPU compatibility checks on the destination host + during live migration. Nova satisfies these minimum version + requirements by a good margin. So, this workaround provides a way to + skip the CPU comparison check on the destination host before + migrating a guest, and let libvirt handle it correctly. + + This workaround will be deprecated and removed once Nova replaces + the older libvirt APIs with their newer counterparts. The work is + being tracked via this `blueprint + cpu-selection-with-hypervisor-consideration`_. + + .. _blueprint cpu-selection-with-hypervisor-consideration: https://blueprints.launchpad.net/nova/+spec/cpu-selection-with-hypervisor-consideration From f3b459ec28e8823d94afa6c9b93b987368adadf8 Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Thu, 10 Nov 2022 09:55:48 -0800 Subject: [PATCH 8/8] [stable-only][cve] Check VMDK create-type against an allowed list NOTE(sbauza): Stable policy allows us to proactively merge a backport without waiting for the parent patch to be merged (exception to rule #4 in [1]. Marking [stable-only] in order to silence nova-tox-validate-backport [1] https://docs.openstack.org/project-team-guide/stable-branches.html#appropriate-fixes Conflicts vs victoria in: nova/conf/compute.py Related-Bug: #1996188 Change-Id: I5a399f1d3d702bfb76c067893e9c924904c8c360 (cherry picked from commit bf8b6f698c23c631c0a71d77f2716ca292c45e74) --- nova/conf/compute.py | 9 ++++++ nova/tests/unit/virt/test_images.py | 46 +++++++++++++++++++++++++++++ nova/virt/images.py | 31 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 8faddc721c8..c012e2d0815 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -959,6 +959,15 @@ * Any integer >= 1 represents the maximum allowed. A value of 0 will cause the ``nova-compute`` service to fail to start, as 0 disk devices is an invalid configuration that would prevent instances from being able to boot. +"""), + cfg.ListOpt('vmdk_allowed_types', + default=['streamOptimized', 'monolithicSparse'], + help=""" +A list of strings describing allowed VMDK "create-type" subformats +that will be allowed. This is recommended to only include +single-file-with-sparse-header variants to avoid potential host file +exposure due to processing named extents. If this list is empty, then no +form of VMDK image will be allowed. """), ] diff --git a/nova/tests/unit/virt/test_images.py b/nova/tests/unit/virt/test_images.py index 199d4cf8e1c..e3279be820b 100644 --- a/nova/tests/unit/virt/test_images.py +++ b/nova/tests/unit/virt/test_images.py @@ -16,6 +16,8 @@ import mock from oslo_concurrency import processutils +from oslo_serialization import jsonutils +from oslo_utils import imageutils import six from nova.compute import utils as compute_utils @@ -136,3 +138,47 @@ def test_convert_image_without_direct_io_support(self, mock_execute, '-O', 'out_format', '-f', 'in_format', 'source', 'dest') mock_disk_op_sema.__enter__.assert_called_once() self.assertTupleEqual(expected, mock_execute.call_args[0]) + + def test_convert_image_vmdk_allowed_list_checking(self): + info = {'format': 'vmdk', + 'format-specific': { + 'type': 'vmdk', + 'data': { + 'create-type': 'monolithicFlat', + }}} + + # If the format is not in the allowed list, we should get an error + self.assertRaises(exception.ImageUnacceptable, + images.check_vmdk_image, 'foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + # With the format in the allowed list, no error + self.flags(vmdk_allowed_types=['streamOptimized', 'monolithicFlat', + 'monolithicSparse'], + group='compute') + images.check_vmdk_image('foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + # With an empty list, allow nothing + self.flags(vmdk_allowed_types=[], group='compute') + self.assertRaises(exception.ImageUnacceptable, + images.check_vmdk_image, 'foo', + imageutils.QemuImgInfo(jsonutils.dumps(info), + format='json')) + + @mock.patch.object(images, 'fetch') + @mock.patch('nova.privsep.qemu.unprivileged_qemu_img_info') + def test_fetch_checks_vmdk_rules(self, mock_info, mock_fetch): + info = {'format': 'vmdk', + 'format-specific': { + 'type': 'vmdk', + 'data': { + 'create-type': 'monolithicFlat', + }}} + mock_info.return_value = jsonutils.dumps(info) + with mock.patch('os.path.exists', return_value=True): + e = self.assertRaises(exception.ImageUnacceptable, + images.fetch_to_raw, None, 'foo', 'anypath') + self.assertIn('Invalid VMDK create-type specified', str(e)) diff --git a/nova/virt/images.py b/nova/virt/images.py index 5358f3766ac..f13c8722909 100644 --- a/nova/virt/images.py +++ b/nova/virt/images.py @@ -110,6 +110,34 @@ def get_info(context, image_href): return IMAGE_API.get(context, image_href) +def check_vmdk_image(image_id, data): + # Check some rules about VMDK files. Specifically we want to make + # sure that the "create-type" of the image is one that we allow. + # Some types of VMDK files can reference files outside the disk + # image and we do not want to allow those for obvious reasons. + + types = CONF.compute.vmdk_allowed_types + + if not len(types): + LOG.warning('Refusing to allow VMDK image as vmdk_allowed_' + 'types is empty') + msg = _('Invalid VMDK create-type specified') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + try: + create_type = data.format_specific['data']['create-type'] + except KeyError: + msg = _('Unable to determine VMDK create-type') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + if create_type not in CONF.compute.vmdk_allowed_types: + LOG.warning('Refusing to process VMDK file with create-type of %r ' + 'which is not in allowed set of: %s', create_type, + ','.join(CONF.compute.vmdk_allowed_types)) + msg = _('Invalid VMDK create-type specified') + raise exception.ImageUnacceptable(image_id=image_id, reason=msg) + + def fetch_to_raw(context, image_href, path, trusted_certs=None): path_tmp = "%s.part" % path fetch(context, image_href, path_tmp, trusted_certs) @@ -129,6 +157,9 @@ def fetch_to_raw(context, image_href, path, trusted_certs=None): reason=(_("fmt=%(fmt)s backed by: %(backing_file)s") % {'fmt': fmt, 'backing_file': backing_file})) + if fmt == 'vmdk': + check_vmdk_image(image_href, data) + if fmt != "raw" and CONF.force_raw_images: staged = "%s.converted" % path LOG.debug("%s was %s, converting to raw", image_href, fmt)