From 5cfa7001f289569f36059857aa982f1862c062d8 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Tue, 19 Nov 2019 14:45:02 +0000
Subject: [PATCH 1/5] Add functional regression test for bug 1853009

Bug 1853009 describes a race condition involving multiple nova-compute
services with ironic. As the compute services start up, the hash ring
rebalances, and the compute services have an inconsistent view of which
is responsible for a compute node.

The sequence of actions here is adapted from a real world log [1], where
multiple nova-compute services were started simultaneously. In some
cases mocks are used to simulate race conditions.

There are three main issues with the behaviour:

* host2 deletes the orphan node compute node after host1 has taken
  ownership of it.

* host1 assumes that another compute service will not delete its nodes.
  Once a node is in rt.compute_nodes, it is not removed again unless the
  node is orphaned. This prevents host1 from recreating the compute
  node.

* host1 assumes that another compute service will not delete its
  resource providers. Once an RP is in the provider tree, it is not
  removed.

This functional test documents the current behaviour, with the idea that
it can be updated as this behaviour is fixed.

[1] http://paste.openstack.org/show/786272/

Co-Authored-By: Matt Riedemann <mriedem.os@gmail.com>

Change-Id: Ice4071722de54e8d20bb8c3795be22f1995940cd
Related-Bug: #1853009
Related-Bug: #1853159
---
 .../regressions/test_bug_1853009.py           | 170 ++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 nova/tests/functional/regressions/test_bug_1853009.py

diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
new file mode 100644
index 00000000000..a72931a10b1
--- /dev/null
+++ b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -0,0 +1,170 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import mock
+
+from nova import context
+from nova import objects
+from nova.tests.functional import integrated_helpers
+
+
+class NodeRebalanceDeletedComputeNodeRaceTestCase(
+        integrated_helpers.ProviderUsageBaseTestCase):
+    """Regression test for bug 1853009 observed in Rocky & later.
+
+    When an ironic node re-balances from one host to another, there can be a
+    race where the old host deletes the orphan compute node after the new host
+    has taken ownership of it which results in the new host failing to create
+    the compute node and resource provider because the ResourceTracker does not
+    detect a change.
+    """
+    # Make sure we're using the fake driver that has predictable uuids
+    # for each node.
+    compute_driver = 'fake.PredictableNodeUUIDDriver'
+
+    def _assert_hypervisor_api(self, nodename, expected_host):
+        # We should have one compute node shown by the API.
+        hypervisors = self.api.api_get(
+            '/os-hypervisors/detail').body['hypervisors']
+        self.assertEqual(1, len(hypervisors), hypervisors)
+        hypervisor = hypervisors[0]
+        self.assertEqual(nodename, hypervisor['hypervisor_hostname'])
+        self.assertEqual(expected_host, hypervisor['service']['host'])
+
+    def _start_compute(self, host):
+        host = self.start_service('compute', host)
+        # Ironic compute driver has rebalances_nodes = True.
+        host.manager.driver.rebalances_nodes = True
+        return host
+
+    def setUp(self):
+        super(NodeRebalanceDeletedComputeNodeRaceTestCase, self).setUp()
+        nodename = 'fake-node'
+        ctxt = context.get_admin_context()
+
+        # Simulate a service running and then stopping.
+        # host2 runs, creates fake-node, then is stopped. The fake-node compute
+        # node is destroyed. This leaves a soft-deleted node in the DB.
+        host2 = self._start_compute('host2')
+        host2.manager.driver._set_nodes([nodename])
+        host2.manager.update_available_resource(ctxt)
+        host2.stop()
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            ctxt, 'host2', nodename)
+        cn.destroy()
+
+    def test_node_rebalance_deleted_compute_node_race(self):
+        nodename = 'fake-node'
+        ctxt = context.get_admin_context()
+
+        # First we create a compute service to manage our node.
+        # When start_service runs, it will create a host1 ComputeNode. We want
+        # to delete that and inject our fake node into the driver which will
+        # be re-balanced to another host later.
+        host1 = self._start_compute('host1')
+        host1.manager.driver._set_nodes([nodename])
+
+        # Run the update_available_resource periodic to register fake-node and
+        # have it managed by host1. This will also detect the "host1" node as
+        # orphaned and delete it along with its resource provider.
+
+        # host1[1]: Finds no compute record in RT. Tries to create one
+        # (_init_compute_node).
+        # FIXME(mgoddard): This shows a traceback with SQL rollback due to
+        # soft-deleted node. The create seems to succeed but breaks the RT
+        # update for this node. See
+        # https://bugs.launchpad.net/nova/+bug/1853159.
+        host1.manager.update_available_resource(ctxt)
+        self._assert_hypervisor_api(nodename, 'host1')
+        # There should only be one resource provider (fake-node).
+        original_rps = self._get_all_providers()
+        self.assertEqual(1, len(original_rps), original_rps)
+        self.assertEqual(nodename, original_rps[0]['name'])
+
+        # Simulate a re-balance by starting host2 and make it manage fake-node.
+        # At this point both host1 and host2 think they own fake-node.
+        host2 = self._start_compute('host2')
+        host2.manager.driver._set_nodes([nodename])
+
+        # host2[1]: Finds no compute record in RT, 'moves' existing node from
+        # host1
+        host2.manager.update_available_resource(ctxt)
+        # Assert that fake-node was re-balanced from host1 to host2.
+        self.assertIn('ComputeNode fake-node moving from host1 to host2',
+                      self.stdlog.logger.output)
+        self._assert_hypervisor_api(nodename, 'host2')
+
+        # host2[2]: Begins periodic update, queries compute nodes for this
+        # host, finds the fake-node.
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            ctxt, 'host2', nodename)
+
+        # host1[2]: Finds no compute record in RT, 'moves' existing node from
+        # host2
+        host1.manager.update_available_resource(ctxt)
+        # Assert that fake-node was re-balanced from host2 to host1.
+        self.assertIn('ComputeNode fake-node moving from host2 to host1',
+                      self.stdlog.logger.output)
+        self._assert_hypervisor_api(nodename, 'host1')
+
+        # Complete rebalance, as host2 realises it does not own fake-node.
+        host2.manager.driver._set_nodes([])
+
+        # host2[2]: Deletes orphan compute node.
+        # Mock out the compute node query to simulate a race condition where
+        # the list includes an orphan compute node that is taken ownership of
+        # by host1 by the time host2 deletes it.
+        # FIXME(mgoddard): Ideally host2 would not delete a node that does not
+        # belong to it. See https://bugs.launchpad.net/nova/+bug/1853009.
+        with mock.patch('nova.compute.manager.ComputeManager.'
+                        '_get_compute_nodes_in_db') as mock_get:
+            mock_get.return_value = [cn]
+            host2.manager.update_available_resource(ctxt)
+
+        # Verify that the node was deleted.
+        self.assertIn("Deleting orphan compute node %s hypervisor host "
+                      "is fake-node, nodes are" % cn.id,
+                      self.stdlog.logger.output)
+        hypervisors = self.api.api_get(
+            '/os-hypervisors/detail').body['hypervisors']
+        self.assertEqual(0, len(hypervisors), hypervisors)
+        rps = self._get_all_providers()
+        self.assertEqual(0, len(rps), rps)
+
+        # host1[3]: Should recreate compute node and resource provider.
+        # FIXME(mgoddard): Compute node not recreated here, because it is
+        # already in RT.compute_nodes. See
+        # https://bugs.launchpad.net/nova/+bug/1853009.
+        # FIXME(mgoddard): Resource provider not recreated here, because it
+        # exists in the provider tree. See
+        # https://bugs.launchpad.net/nova/+bug/1841481.
+        host1.manager.update_available_resource(ctxt)
+
+        # Verify that the node was not recreated.
+        hypervisors = self.api.api_get(
+            '/os-hypervisors/detail').body['hypervisors']
+        self.assertEqual(0, len(hypervisors), hypervisors)
+
+        # But the compute node exists in the RT.
+        self.assertIn(nodename, host1.manager.rt.compute_nodes)
+
+        # There is no RP.
+        rps = self._get_all_providers()
+        self.assertEqual(0, len(rps), rps)
+
+        # But the RP it exists in the provider tree.
+        self.assertTrue(host1.manager.rt.reportclient._provider_tree.exists(
+            nodename))
+
+        # This fails due to the lack of a resource provider.
+        self.assertIn('Skipping removal of allocations for deleted instances',
+                      self.stdlog.logger.output)

From e2c3c945ff66f31b2271bbf6e52c3525200d3459 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Tue, 19 Nov 2019 17:12:10 +0000
Subject: [PATCH 2/5] Clear rebalanced compute nodes from resource tracker

There is a race condition in nova-compute with the ironic virt driver as
nodes get rebalanced. It can lead to compute nodes being removed in the
DB and not repopulated. Ultimately this prevents these nodes from being
scheduled to.

The issue being addressed here is that if a compute node is deleted by a host
which thinks it is an orphan, then the compute host that actually owns the node
might not recreate it if the node is already in its resource tracker cache.

This change fixes the issue by clearing nodes from the resource tracker cache
for which a compute node entry does not exist. Then, when the available
resource for the node is updated, the compute node object is not found in the
cache and gets recreated.

Change-Id: I39241223b447fcc671161c370dbf16e1773b684a
Partial-Bug: #1853009
---
 nova/compute/manager.py                       |  2 ++
 nova/compute/resource_tracker.py              | 17 +++++++++
 .../regressions/test_bug_1853009.py           | 35 ++++++++++++++-----
 nova/tests/unit/compute/test_compute_mgr.py   |  3 ++
 .../unit/compute/test_resource_tracker.py     | 17 +++++++++
 5 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 6f47428adfc..77c9cc4b07d 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -8823,6 +8823,8 @@ def update_available_resource(self, context, startup=False):
                                                             use_slave=True,
                                                             startup=startup)
 
+        self.rt.clean_compute_node_cache(compute_nodes_in_db)
+
         # Delete orphan compute node not reported by driver but still in db
         for cn in compute_nodes_in_db:
             if cn.hypervisor_hostname not in nodenames:
diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index b2ba0145cf4..886b19e59a0 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -1801,3 +1801,20 @@ def free_pci_device_claims_for_instance(self, context, instance):
         """
         self.pci_tracker.free_instance_claims(context, instance)
         self.pci_tracker.save(context)
+
+    @utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)
+    def clean_compute_node_cache(self, compute_nodes_in_db):
+        """Clean the compute node cache of any nodes that no longer exist.
+
+        :param compute_nodes_in_db: list of ComputeNode objects from the DB.
+        """
+        compute_nodes_in_db_nodenames = {cn.hypervisor_hostname
+                                         for cn in compute_nodes_in_db}
+        stale_cns = set(self.compute_nodes) - compute_nodes_in_db_nodenames
+
+        for stale_cn in stale_cns:
+            # NOTE(mgoddard): we have found a node in the cache that has no
+            # compute node in the DB. This could be due to a node rebalance
+            # where another compute service took ownership of the node. Clean
+            # up the cache.
+            self.remove_node(stale_cn)
diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
index a72931a10b1..54922a1a1dd 100644
--- a/nova/tests/functional/regressions/test_bug_1853009.py
+++ b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -141,26 +141,43 @@ def test_node_rebalance_deleted_compute_node_race(self):
         self.assertEqual(0, len(rps), rps)
 
         # host1[3]: Should recreate compute node and resource provider.
-        # FIXME(mgoddard): Compute node not recreated here, because it is
-        # already in RT.compute_nodes. See
-        # https://bugs.launchpad.net/nova/+bug/1853009.
         # FIXME(mgoddard): Resource provider not recreated here, because it
         # exists in the provider tree. See
         # https://bugs.launchpad.net/nova/+bug/1841481.
         host1.manager.update_available_resource(ctxt)
 
-        # Verify that the node was not recreated.
-        hypervisors = self.api.api_get(
-            '/os-hypervisors/detail').body['hypervisors']
-        self.assertEqual(0, len(hypervisors), hypervisors)
+        # Verify that the node was recreated.
+        self._assert_hypervisor_api(nodename, 'host1')
 
-        # But the compute node exists in the RT.
-        self.assertIn(nodename, host1.manager.rt.compute_nodes)
+        # But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
+        # node is not cached in the RT.
+        self.assertNotIn(nodename, host1.manager.rt.compute_nodes)
 
         # There is no RP.
         rps = self._get_all_providers()
         self.assertEqual(0, len(rps), rps)
 
+        # But the RP it exists in the provider tree.
+        self.assertTrue(host1.manager.rt.reportclient._provider_tree.exists(
+            nodename))
+
+        # host1[1]: Should add compute node to RT cache and recreate resource
+        # provider.
+        # FIXME(mgoddard): Resource provider not recreated here, because it
+        # exists in the provider tree. See
+        # https://bugs.launchpad.net/nova/+bug/1841481.
+        host1.manager.update_available_resource(ctxt)
+
+        # Verify that the node still exists.
+        self._assert_hypervisor_api(nodename, 'host1')
+
+        # And it is now in the RT cache.
+        self.assertIn(nodename, host1.manager.rt.compute_nodes)
+
+        # There is still no RP.
+        rps = self._get_all_providers()
+        self.assertEqual(0, len(rps), rps)
+
         # But the RP it exists in the provider tree.
         self.assertTrue(host1.manager.rt.reportclient._provider_tree.exists(
             nodename))
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 5f01113525d..541b1dbe3e9 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -341,6 +341,7 @@ def test_update_available_resource(self, get_db_nodes, get_avail_nodes,
         )
 
         # First node in set should have been removed from DB
+        # Last node in set should have been added to DB.
         for db_node in db_nodes:
             if db_node.hypervisor_hostname == 'node1':
                 db_node.destroy.assert_called_once_with()
@@ -350,6 +351,8 @@ def test_update_available_resource(self, get_db_nodes, get_avail_nodes,
                     'node1')
             else:
                 self.assertFalse(db_node.destroy.called)
+        self.assertEqual(1, mock_rt.remove_node.call_count)
+        mock_rt.clean_compute_node_cache.assert_called_once_with(db_nodes)
 
     @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
                 'delete_resource_provider')
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 9bae648efe9..62f7ace8ace 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -4006,3 +4006,20 @@ def test_init_ensure_provided_reportclient_is_used(self):
         rt = resource_tracker.ResourceTracker(
             _HOSTNAME, mock.sentinel.driver, mock.sentinel.reportclient)
         self.assertIs(rt.reportclient, mock.sentinel.reportclient)
+
+
+class TestCleanComputeNodeCache(BaseTestCase):
+
+    def setUp(self):
+        super(TestCleanComputeNodeCache, self).setUp()
+        self._setup_rt()
+        self.context = context.RequestContext(mock.sentinel.user_id,
+                                              mock.sentinel.project_id)
+
+    @mock.patch.object(resource_tracker.ResourceTracker, "remove_node")
+    def test_clean_compute_node_cache(self, mock_remove):
+        invalid_nodename = "invalid-node"
+        self.rt.compute_nodes[_NODENAME] = self.compute
+        self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute
+        self.rt.clean_compute_node_cache([self.compute])
+        mock_remove.assert_called_once_with(invalid_nodename)

From 905ee1518b05e8714404a20d5af74e7aad4f5e18 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Tue, 19 Nov 2019 16:51:01 +0000
Subject: [PATCH 3/5] Invalidate provider tree when compute node disappears

There is a race condition in nova-compute with the ironic virt driver
as nodes get rebalanced. It can lead to compute nodes being removed in
the DB and not repopulated. Ultimately this prevents these nodes from
being scheduled to.

The issue being addressed here is that if a compute node is deleted by a
host which thinks it is an orphan, then the resource provider for that
node might also be deleted. The compute host that owns the node might
not recreate the resource provider if it exists in the provider tree
cache.

This change fixes the issue by clearing resource providers from the
provider tree cache for which a compute node entry does not exist. Then,
when the available resource for the node is updated, the resource
providers are not found in the cache and get recreated in placement.

Change-Id: Ia53ff43e6964963cdf295604ba0fb7171389606e
Related-Bug: #1853009
Related-Bug: #1841481
---
 nova/compute/resource_tracker.py              |  1 +
 nova/scheduler/client/report.py               | 17 ++++++++++++-----
 .../regressions/test_bug_1853009.py           | 19 ++++++-------------
 .../unit/compute/test_resource_tracker.py     |  8 ++++++--
 4 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py
index 886b19e59a0..4cc370a5329 100644
--- a/nova/compute/resource_tracker.py
+++ b/nova/compute/resource_tracker.py
@@ -1818,3 +1818,4 @@ def clean_compute_node_cache(self, compute_nodes_in_db):
             # where another compute service took ownership of the node. Clean
             # up the cache.
             self.remove_node(stale_cn)
+            self.reportclient.invalidate_resource_provider(stale_cn)
diff --git a/nova/scheduler/client/report.py b/nova/scheduler/client/report.py
index 98eb8164dba..37dd069ef33 100644
--- a/nova/scheduler/client/report.py
+++ b/nova/scheduler/client/report.py
@@ -677,11 +677,7 @@ def _delete_provider(self, rp_uuid, global_request_id=None):
             if resp:
                 LOG.info("Deleted resource provider %s", rp_uuid)
             # clean the caches
-            try:
-                self._provider_tree.remove(rp_uuid)
-            except ValueError:
-                pass
-            self._association_refresh_time.pop(rp_uuid, None)
+            self.invalidate_resource_provider(rp_uuid)
             return
 
         msg = ("[%(placement_req_id)s] Failed to delete resource provider "
@@ -2155,6 +2151,17 @@ def delete_resource_provider(self, context, compute_node, cascade=False):
             # for backward compatibility.
             pass
 
+    def invalidate_resource_provider(self, name_or_uuid):
+        """Invalidate the cache for a resource provider.
+
+        :param name_or_uuid: Name or UUID of the resource provider to look up.
+        """
+        try:
+            self._provider_tree.remove(name_or_uuid)
+        except ValueError:
+            pass
+        self._association_refresh_time.pop(name_or_uuid, None)
+
     def get_provider_by_name(self, context, name):
         """Queries the placement API for resource provider information matching
         a supplied name.
diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
index 54922a1a1dd..490b7f3c71a 100644
--- a/nova/tests/functional/regressions/test_bug_1853009.py
+++ b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -141,9 +141,8 @@ def test_node_rebalance_deleted_compute_node_race(self):
         self.assertEqual(0, len(rps), rps)
 
         # host1[3]: Should recreate compute node and resource provider.
-        # FIXME(mgoddard): Resource provider not recreated here, because it
-        # exists in the provider tree. See
-        # https://bugs.launchpad.net/nova/+bug/1841481.
+        # FIXME(mgoddard): Resource provider not recreated here, due to
+        # https://bugs.launchpad.net/nova/+bug/1853159.
         host1.manager.update_available_resource(ctxt)
 
         # Verify that the node was recreated.
@@ -158,14 +157,11 @@ def test_node_rebalance_deleted_compute_node_race(self):
         self.assertEqual(0, len(rps), rps)
 
         # But the RP it exists in the provider tree.
-        self.assertTrue(host1.manager.rt.reportclient._provider_tree.exists(
+        self.assertFalse(host1.manager.rt.reportclient._provider_tree.exists(
             nodename))
 
         # host1[1]: Should add compute node to RT cache and recreate resource
         # provider.
-        # FIXME(mgoddard): Resource provider not recreated here, because it
-        # exists in the provider tree. See
-        # https://bugs.launchpad.net/nova/+bug/1841481.
         host1.manager.update_available_resource(ctxt)
 
         # Verify that the node still exists.
@@ -174,13 +170,10 @@ def test_node_rebalance_deleted_compute_node_race(self):
         # And it is now in the RT cache.
         self.assertIn(nodename, host1.manager.rt.compute_nodes)
 
-        # There is still no RP.
+        # The resource provider has now been created.
         rps = self._get_all_providers()
-        self.assertEqual(0, len(rps), rps)
-
-        # But the RP it exists in the provider tree.
-        self.assertTrue(host1.manager.rt.reportclient._provider_tree.exists(
-            nodename))
+        self.assertEqual(1, len(rps), rps)
+        self.assertEqual(nodename, rps[0]['name'])
 
         # This fails due to the lack of a resource provider.
         self.assertIn('Skipping removal of allocations for deleted instances',
diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py
index 62f7ace8ace..c39fd1b03b7 100644
--- a/nova/tests/unit/compute/test_resource_tracker.py
+++ b/nova/tests/unit/compute/test_resource_tracker.py
@@ -4021,5 +4021,9 @@ def test_clean_compute_node_cache(self, mock_remove):
         invalid_nodename = "invalid-node"
         self.rt.compute_nodes[_NODENAME] = self.compute
         self.rt.compute_nodes[invalid_nodename] = mock.sentinel.compute
-        self.rt.clean_compute_node_cache([self.compute])
-        mock_remove.assert_called_once_with(invalid_nodename)
+        with mock.patch.object(
+                self.rt.reportclient,
+                "invalidate_resource_provider") as mock_invalidate:
+            self.rt.clean_compute_node_cache([self.compute])
+            mock_remove.assert_called_once_with(invalid_nodename)
+            mock_invalidate.assert_called_once_with(invalid_nodename)

From 56a44f324c757da28efdde64eebe5618f74a98b5 Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Mon, 18 Nov 2019 12:06:47 +0000
Subject: [PATCH 4/5] Prevent deletion of a compute node belonging to another
 host

There is a race condition in nova-compute with the ironic virt driver as
nodes get rebalanced. It can lead to compute nodes being removed in the
DB and not repopulated. Ultimately this prevents these nodes from being
scheduled to.

The main race condition involved is in update_available_resources in
the compute manager. When the list of compute nodes is queried, there is
a compute node belonging to the host that it does not expect to be
managing, i.e. it is an orphan. Between that time and deleting the
orphan, the real owner of the compute node takes ownership of it ( in
the resource tracker). However, the node is still deleted as the first
host is unaware of the ownership change.

This change prevents this from occurring by filtering on the host when
deleting a compute node. If another compute host has taken ownership of
a node, it will have updated the host field and this will prevent
deletion from occurring. The first host sees this has happened via the
ComputeHostNotFound exception, and avoids deleting its resource
provider.

Closes-Bug: #1853009
Related-Bug: #1841481
Change-Id: I260c1fded79a85d4899e94df4d9036a1ee437f02
---
 nova/compute/manager.py                       | 31 +++++++++++++++----
 nova/db/api.py                                |  5 +--
 nova/db/sqlalchemy/api.py                     |  4 +--
 nova/objects/compute_node.py                  |  2 +-
 .../regressions/test_bug_1853009.py           | 23 +++++++++-----
 nova/tests/unit/compute/test_compute.py       |  4 ++-
 nova/tests/unit/compute/test_compute_mgr.py   | 27 ++++++++++++++++
 nova/tests/unit/db/test_db_api.py             | 12 +++++--
 nova/tests/unit/objects/test_compute_node.py  |  3 +-
 .../notes/bug-1853009-99414e14d1491b5f.yaml   |  7 +++++
 10 files changed, 96 insertions(+), 22 deletions(-)
 create mode 100644 releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 77c9cc4b07d..0dfd1592768 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -8833,12 +8833,31 @@ def update_available_resource(self, context, startup=False):
                          "nodes are %(nodes)s",
                          {'id': cn.id, 'hh': cn.hypervisor_hostname,
                           'nodes': nodenames})
-                cn.destroy()
-                self.rt.remove_node(cn.hypervisor_hostname)
-                # Delete the corresponding resource provider in placement,
-                # along with any associated allocations and inventory.
-                self.reportclient.delete_resource_provider(context, cn,
-                                                           cascade=True)
+                try:
+                    cn.destroy()
+                except exception.ComputeHostNotFound:
+                    # NOTE(mgoddard): it's possible that another compute
+                    # service took ownership of this compute node since we
+                    # queried it due to a rebalance, and this will cause the
+                    # deletion to fail. Ignore the error in that case.
+                    LOG.info("Ignoring failure to delete orphan compute node "
+                             "%(id)s on hypervisor host %(hh)s due to "
+                             "possible node rebalance",
+                             {'id': cn.id, 'hh': cn.hypervisor_hostname})
+                    self.rt.remove_node(cn.hypervisor_hostname)
+                    self.reportclient.invalidate_resource_provider(cn.uuid)
+                else:
+                    self.rt.remove_node(cn.hypervisor_hostname)
+                    # Delete the corresponding resource provider in placement,
+                    # along with any associated allocations.
+                    try:
+                        self.reportclient.delete_resource_provider(
+                            context, cn, cascade=True)
+                    except keystone_exception.ClientException as e:
+                        LOG.error(
+                            "Failed to delete compute node resource provider "
+                            "for compute node %s: %s",
+                            cn.uuid, six.text_type(e))
 
         for nodename in nodenames:
             self._update_available_resource_for_node(context, nodename,
diff --git a/nova/db/api.py b/nova/db/api.py
index c65803bed43..a5d63c327b7 100644
--- a/nova/db/api.py
+++ b/nova/db/api.py
@@ -344,15 +344,16 @@ def compute_node_update(context, compute_id, values):
     return IMPL.compute_node_update(context, compute_id, values)
 
 
-def compute_node_delete(context, compute_id):
+def compute_node_delete(context, compute_id, compute_host):
     """Delete a compute node from the database.
 
     :param context: The security context
     :param compute_id: ID of the compute node
+    :param compute_host: Hostname of the compute service
 
     Raises ComputeHostNotFound if compute node with the given ID doesn't exist.
     """
-    return IMPL.compute_node_delete(context, compute_id)
+    return IMPL.compute_node_delete(context, compute_id, compute_host)
 
 
 def compute_node_statistics(context):
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index 02311f240d5..c972591cfbd 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -763,10 +763,10 @@ def compute_node_update(context, compute_id, values):
 
 
 @pick_context_manager_writer
-def compute_node_delete(context, compute_id):
+def compute_node_delete(context, compute_id, compute_host):
     """Delete a ComputeNode record."""
     result = model_query(context, models.ComputeNode).\
-             filter_by(id=compute_id).\
+             filter_by(id=compute_id, host=compute_host).\
              soft_delete(synchronize_session=False)
 
     if not result:
diff --git a/nova/objects/compute_node.py b/nova/objects/compute_node.py
index b7f7f966b9d..d5e5853f6d6 100644
--- a/nova/objects/compute_node.py
+++ b/nova/objects/compute_node.py
@@ -354,7 +354,7 @@ def save(self, prune_stats=False):
 
     @base.remotable
     def destroy(self):
-        db.compute_node_delete(self._context, self.id)
+        db.compute_node_delete(self._context, self.id, self.host)
 
     def update_from_virt_driver(self, resources):
         # NOTE(pmurray): the virt driver provides a dict of values that
diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
index 490b7f3c71a..4da9061b9c7 100644
--- a/nova/tests/functional/regressions/test_bug_1853009.py
+++ b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -123,22 +123,31 @@ def test_node_rebalance_deleted_compute_node_race(self):
         # Mock out the compute node query to simulate a race condition where
         # the list includes an orphan compute node that is taken ownership of
         # by host1 by the time host2 deletes it.
-        # FIXME(mgoddard): Ideally host2 would not delete a node that does not
-        # belong to it. See https://bugs.launchpad.net/nova/+bug/1853009.
         with mock.patch('nova.compute.manager.ComputeManager.'
                         '_get_compute_nodes_in_db') as mock_get:
             mock_get.return_value = [cn]
             host2.manager.update_available_resource(ctxt)
 
-        # Verify that the node was deleted.
+        # Verify that the node was almost deleted, but saved by the host check.
         self.assertIn("Deleting orphan compute node %s hypervisor host "
                       "is fake-node, nodes are" % cn.id,
                       self.stdlog.logger.output)
-        hypervisors = self.api.api_get(
-            '/os-hypervisors/detail').body['hypervisors']
-        self.assertEqual(0, len(hypervisors), hypervisors)
+        self.assertIn("Ignoring failure to delete orphan compute node %s on "
+                      "hypervisor host fake-node" % cn.id,
+                      self.stdlog.logger.output)
+        self._assert_hypervisor_api(nodename, 'host1')
         rps = self._get_all_providers()
-        self.assertEqual(0, len(rps), rps)
+        self.assertEqual(1, len(rps), rps)
+        self.assertEqual(nodename, rps[0]['name'])
+
+        # Simulate deletion of an orphan by host2. It shouldn't happen anymore,
+        # but let's assume it already did.
+        cn = objects.ComputeNode.get_by_host_and_nodename(
+            ctxt, 'host1', nodename)
+        cn.destroy()
+        host2.manager.rt.remove_node(cn.hypervisor_hostname)
+        host2.manager.reportclient.delete_resource_provider(
+            ctxt, cn, cascade=True)
 
         # host1[3]: Should recreate compute node and resource provider.
         # FIXME(mgoddard): Resource provider not recreated here, due to
diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py
index d84b5be3e5c..6a833d4073f 100644
--- a/nova/tests/unit/compute/test_compute.py
+++ b/nova/tests/unit/compute/test_compute.py
@@ -202,8 +202,10 @@ def fake_get_compute_nodes_in_db(self, context, *args, **kwargs):
                         context, objects.ComputeNode(), cn)
                     for cn in fake_compute_nodes]
 
-        def fake_compute_node_delete(context, compute_node_id):
+        def fake_compute_node_delete(context, compute_node_id,
+                                     compute_node_host):
             self.assertEqual(2, compute_node_id)
+            self.assertEqual('fake_phyp1', compute_node_host)
 
         self.stub_out(
             'nova.compute.manager.ComputeManager._get_compute_nodes_in_db',
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index 541b1dbe3e9..d608fcd00bc 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -374,6 +374,33 @@ def test_update_available_resource_not_ready(self, get_db_nodes,
         update_mock.assert_not_called()
         del_rp_mock.assert_not_called()
 
+    @mock.patch.object(manager.ComputeManager,
+                       '_update_available_resource_for_node')
+    @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
+    @mock.patch.object(manager.ComputeManager, '_get_compute_nodes_in_db')
+    def test_update_available_resource_destroy_rebalance(
+            self, get_db_nodes, get_avail_nodes, update_mock):
+        mock_rt = self._mock_rt()
+        rc_mock = self.useFixture(fixtures.fixtures.MockPatchObject(
+            self.compute, 'reportclient')).mock
+        db_nodes = [self._make_compute_node('node1', 1)]
+        get_db_nodes.return_value = db_nodes
+        # Destroy can fail if nodes were rebalanced between getting the node
+        # list and calling destroy.
+        db_nodes[0].destroy.side_effect = exception.ComputeHostNotFound(
+            'node1')
+        get_avail_nodes.return_value = set()
+        self.compute.update_available_resource(self.context)
+        get_db_nodes.assert_called_once_with(self.context, set(),
+                                             use_slave=True, startup=False)
+        self.assertEqual(0, update_mock.call_count)
+
+        db_nodes[0].destroy.assert_called_once_with()
+        self.assertEqual(0, rc_mock.delete_resource_provider.call_count)
+        mock_rt.remove_node.assert_called_once_with('node1')
+        rc_mock.invalidate_resource_provider.assert_called_once_with(
+            db_nodes[0].uuid)
+
     @mock.patch('nova.context.get_admin_context')
     def test_pre_start_hook(self, get_admin_context):
         """Very simple test just to make sure update_available_resource is
diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py
index fb42d1af524..92f9a8da80d 100644
--- a/nova/tests/unit/db/test_db_api.py
+++ b/nova/tests/unit/db/test_db_api.py
@@ -7427,7 +7427,7 @@ def test_compute_node_get_all_deleted_compute_node(self):
 
             # Now delete the newly-created compute node to ensure the related
             # compute node stats are wiped in a cascaded fashion
-            db.compute_node_delete(self.ctxt, node['id'])
+            db.compute_node_delete(self.ctxt, node['id'], node['host'])
 
             # Clean up the service
             db.service_destroy(self.ctxt, service['id'])
@@ -7601,10 +7601,18 @@ def test_compute_node_update(self):
 
     def test_compute_node_delete(self):
         compute_node_id = self.item['id']
-        db.compute_node_delete(self.ctxt, compute_node_id)
+        compute_node_host = self.item['host']
+        db.compute_node_delete(self.ctxt, compute_node_id, compute_node_host)
         nodes = db.compute_node_get_all(self.ctxt)
         self.assertEqual(len(nodes), 0)
 
+    def test_compute_node_delete_different_host(self):
+        compute_node_id = self.item['id']
+        compute_node_host = 'invalid-host'
+        self.assertRaises(exception.ComputeHostNotFound,
+                          db.compute_node_delete,
+                          self.ctxt, compute_node_id, compute_node_host)
+
     def test_compute_node_search_by_hypervisor(self):
         nodes_created = []
         new_service = copy.copy(self.service_dict)
diff --git a/nova/tests/unit/objects/test_compute_node.py b/nova/tests/unit/objects/test_compute_node.py
index aeaf4b957fe..237c0c7cb27 100644
--- a/nova/tests/unit/objects/test_compute_node.py
+++ b/nova/tests/unit/objects/test_compute_node.py
@@ -414,8 +414,9 @@ def test_set_id_failure(self, mock_get, db_mock):
     def test_destroy(self, mock_delete):
         compute = compute_node.ComputeNode(context=self.context)
         compute.id = 123
+        compute.host = 'fake'
         compute.destroy()
-        mock_delete.assert_called_once_with(self.context, 123)
+        mock_delete.assert_called_once_with(self.context, 123, 'fake')
 
     @mock.patch.object(db, 'compute_node_get_all')
     def test_get_all(self, mock_get_all):
diff --git a/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml b/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml
new file mode 100644
index 00000000000..d6e71c42224
--- /dev/null
+++ b/releasenotes/notes/bug-1853009-99414e14d1491b5f.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+  - |
+    Fixes an issue with multiple ``nova-compute`` services used with Ironic,
+    where a rebalance operation could result in a compute node being deleted
+    from the database and not recreated. See `bug 1853009
+    <https://bugs.launchpad.net/nova/+bug/1853009>`__ for details.

From 4fe087cbbf7503421fa8b71bdf8bf971ff27158d Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Wed, 20 Nov 2019 12:01:33 +0000
Subject: [PATCH 5/5] Fix inactive session error in compute node creation

In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be
restored, to ensure we can reuse ironic node UUIDs as compute node
UUIDs. While this seems to largely work, it results in some nasty errors
being generated [3]:

    InvalidRequestError This session is in 'inactive' state, due to the
    SQL transaction being rolled back; no further SQL can be emitted
    within this transaction.

This happens because compute_node_create is decorated with
pick_context_manager_writer, which begins a transaction. While
_compute_node_get_and_update_deleted claims that calling a second
pick_context_manager_writer decorated function will begin a new
subtransaction, this does not appear to be the case.

This change removes pick_context_manager_writer from the
compute_node_create function, and adds a new _compute_node_create
function which ensures the transaction is finished if
_compute_node_get_and_update_deleted is called.

The new unit test added here fails without this change.

This change marks the removal of the final FIXME from the functional
test added in [4].

[1] https://bugs.launchpad.net/nova/+bug/1839560
[2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411
[3] http://paste.openstack.org/show/786350/
[4] https://review.opendev.org/#/c/695012/

Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73
Closes-Bug: #1853159
Related-Bug: #1853009
---
 nova/db/sqlalchemy/api.py                     | 17 ++++++++--
 .../regressions/test_bug_1853009.py           | 32 -------------------
 nova/tests/unit/db/test_db_api.py             | 14 ++++++++
 3 files changed, 29 insertions(+), 34 deletions(-)

diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py
index c972591cfbd..cd47f18771a 100644
--- a/nova/db/sqlalchemy/api.py
+++ b/nova/db/sqlalchemy/api.py
@@ -690,7 +690,7 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
 
 
 @pick_context_manager_writer
-def compute_node_create(context, values):
+def _compute_node_create(context, values):
     """Creates a new ComputeNode and populates the capacity fields
     with the most recent data.
     """
@@ -698,8 +698,21 @@ def compute_node_create(context, values):
 
     compute_node_ref = models.ComputeNode()
     compute_node_ref.update(values)
+    compute_node_ref.save(context.session)
+    return compute_node_ref
+
+
+# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
+# so that we get a separate transaction in the exception handler. This avoids
+# an error message about inactive DB sessions during a transaction rollback.
+# See https://bugs.launchpad.net/nova/+bug/1853159.
+def compute_node_create(context, values):
+    """Creates a new ComputeNode and populates the capacity fields
+    with the most recent data. Will restore a soft deleted compute node if a
+    UUID has been explicitly requested.
+    """
     try:
-        compute_node_ref.save(context.session)
+        compute_node_ref = _compute_node_create(context, values)
     except db_exc.DBDuplicateEntry:
         with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
             # Check to see if we have a (soft) deleted ComputeNode with the
diff --git a/nova/tests/functional/regressions/test_bug_1853009.py b/nova/tests/functional/regressions/test_bug_1853009.py
index 4da9061b9c7..303867718a5 100644
--- a/nova/tests/functional/regressions/test_bug_1853009.py
+++ b/nova/tests/functional/regressions/test_bug_1853009.py
@@ -79,10 +79,6 @@ def test_node_rebalance_deleted_compute_node_race(self):
 
         # host1[1]: Finds no compute record in RT. Tries to create one
         # (_init_compute_node).
-        # FIXME(mgoddard): This shows a traceback with SQL rollback due to
-        # soft-deleted node. The create seems to succeed but breaks the RT
-        # update for this node. See
-        # https://bugs.launchpad.net/nova/+bug/1853159.
         host1.manager.update_available_resource(ctxt)
         self._assert_hypervisor_api(nodename, 'host1')
         # There should only be one resource provider (fake-node).
@@ -150,40 +146,12 @@ def test_node_rebalance_deleted_compute_node_race(self):
             ctxt, cn, cascade=True)
 
         # host1[3]: Should recreate compute node and resource provider.
-        # FIXME(mgoddard): Resource provider not recreated here, due to
-        # https://bugs.launchpad.net/nova/+bug/1853159.
         host1.manager.update_available_resource(ctxt)
 
         # Verify that the node was recreated.
         self._assert_hypervisor_api(nodename, 'host1')
 
-        # But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
-        # node is not cached in the RT.
-        self.assertNotIn(nodename, host1.manager.rt.compute_nodes)
-
-        # There is no RP.
-        rps = self._get_all_providers()
-        self.assertEqual(0, len(rps), rps)
-
-        # But the RP it exists in the provider tree.
-        self.assertFalse(host1.manager.rt.reportclient._provider_tree.exists(
-            nodename))
-
-        # host1[1]: Should add compute node to RT cache and recreate resource
-        # provider.
-        host1.manager.update_available_resource(ctxt)
-
-        # Verify that the node still exists.
-        self._assert_hypervisor_api(nodename, 'host1')
-
-        # And it is now in the RT cache.
-        self.assertIn(nodename, host1.manager.rt.compute_nodes)
-
         # The resource provider has now been created.
         rps = self._get_all_providers()
         self.assertEqual(1, len(rps), rps)
         self.assertEqual(nodename, rps[0]['name'])
-
-        # This fails due to the lack of a resource provider.
-        self.assertIn('Skipping removal of allocations for deleted instances',
-                      self.stdlog.logger.output)
diff --git a/nova/tests/unit/db/test_db_api.py b/nova/tests/unit/db/test_db_api.py
index 92f9a8da80d..7f22102cc48 100644
--- a/nova/tests/unit/db/test_db_api.py
+++ b/nova/tests/unit/db/test_db_api.py
@@ -7317,6 +7317,20 @@ def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
         self.assertRaises(db_exc.DBDuplicateEntry,
                           db.compute_node_create, self.ctxt, other_node)
 
+    def test_compute_node_create_duplicate_uuid(self):
+        """Tests to make sure that no exception is raised when trying to create
+        a compute node with the same host, hypervisor_hostname and uuid values
+        as another compute node that was previously soft-deleted.
+        """
+        # Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this
+        # raised the following error:
+        # sqlalchemy.exc.InvalidRequestError: This session is in 'inactive'
+        # state, due to the SQL transaction being rolled back; no further SQL
+        # can be emitted within this transaction.
+        db.compute_node_delete(self.ctxt, self.item['id'], self.item['host'])
+        new_node = db.compute_node_create(self.ctxt, self.compute_node_dict)
+        self.assertEqual(self.item['uuid'], new_node['uuid'])
+
     def test_compute_node_get_all(self):
         nodes = db.compute_node_get_all(self.ctxt)
         self.assertEqual(1, len(nodes))