stackhpc
diff --git a/‎nova/compute/manager.py‎
Lines changed: 16 additions & 15 deletions b/‎nova/compute/manager.py‎
Lines changed: 16 additions & 15 deletions
diff --git a/‎nova/compute/resource_tracker.py‎
Lines changed: 31 additions & 52 deletions b/‎nova/compute/resource_tracker.py‎
Lines changed: 31 additions & 52 deletions
diff --git a/‎nova/objects/compute_node.py‎
Lines changed: 5 additions & 2 deletions b/‎nova/objects/compute_node.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎nova/tests/functional/libvirt/base.py‎
Lines changed: 3 additions & 0 deletions b/‎nova/tests/functional/libvirt/base.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎nova/tests/unit/compute/test_compute_mgr.py‎
Lines changed: 17 additions & 22 deletions b/‎nova/tests/unit/compute/test_compute_mgr.py‎
Lines changed: 17 additions & 22 deletions
@@ -97,6 +97,7 @@
 from nova.virt import driver
 from nova.virt import event as virtevent
 from nova.virt import hardware
+import nova.virt.node
 from nova.virt import storage_users
 from nova.virt import virtapi
 from nova.volume import cinder
@@ -1471,27 +1472,27 @@ def _get_nodes(self, context):
         :return: a dict of ComputeNode objects keyed by the UUID of the given
             node.
         """
-        nodes_by_uuid = {}
         try:
-            node_names = self.driver.get_available_nodes()
+            node_uuids = self.driver.get_available_node_uuids()
         except exception.VirtDriverNotReady:
             LOG.warning(
                 "Virt driver is not ready. If this is the first time this "
-                "service is starting on this host, then you can ignore this "
-                "warning.")
+                "service is starting on this host, then you can ignore "
+                "this warning.")
             return {}
 
-        for node_name in node_names:
-            try:
-                node = objects.ComputeNode.get_by_host_and_nodename(
-                    context, self.host, node_name)
-                nodes_by_uuid[node.uuid] = node
-            except exception.ComputeHostNotFound:
-                LOG.warning(
-                    "Compute node %s not found in the database. If this is "
-                    "the first time this service is starting on this host, "
-                    "then you can ignore this warning.", node_name)
-        return nodes_by_uuid
+        nodes = objects.ComputeNodeList.get_all_by_uuids(context, node_uuids)
+        if not nodes:
+            # NOTE(danms): This should only happen if the compute_id is
+            # pre-provisioned on a host that has never started.
+            LOG.warning('Compute nodes %s for host %s were not found in the '
+                        'database. If this is the first time this service is '
+                        'starting on this host, then you can ignore this '
+                        'warning.',
+                        node_uuids, self.host)
+            return {}
+
+        return {n.uuid: n for n in nodes}
 
     def _ensure_existing_node_identity(self, service_ref):
         """If we are upgrading from an older service version, we need
 
@@ -49,6 +49,7 @@
 from nova.scheduler.client import report
 from nova import utils
 from nova.virt import hardware
+from nova.virt import node
 
 
 CONF = nova.conf.CONF
@@ -668,50 +669,6 @@ def disabled(self, nodename):
         return (nodename not in self.compute_nodes or
                 not self.driver.node_is_available(nodename))
 
-    def _check_for_nodes_rebalance(self, context, resources, nodename):
-        """Check if nodes rebalance has happened.
-
-        The ironic driver maintains a hash ring mapping bare metal nodes
-        to compute nodes. If a compute dies, the hash ring is rebuilt, and
-        some of its bare metal nodes (more precisely, those not in ACTIVE
-        state) are assigned to other computes.
-
-        This method checks for this condition and adjusts the database
-        accordingly.
-
-        :param context: security context
-        :param resources: initial values
-        :param nodename: node name
-        :returns: True if a suitable compute node record was found, else False
-        """
-        if not self.driver.rebalances_nodes:
-            return False
-
-        # Its possible ironic just did a node re-balance, so let's
-        # check if there is a compute node that already has the correct
-        # hypervisor_hostname. We can re-use that rather than create a
-        # new one and have to move existing placement allocations
-        cn_candidates = objects.ComputeNodeList.get_by_hypervisor(
-            context, nodename)
-
-        if len(cn_candidates) == 1:
-            cn = cn_candidates[0]
-            LOG.info("ComputeNode %(name)s moving from %(old)s to %(new)s",
-                     {"name": nodename, "old": cn.host, "new": self.host})
-            cn.host = self.host
-            self.compute_nodes[nodename] = cn
-            self._copy_resources(cn, resources)
-            self._setup_pci_tracker(context, cn, resources)
-            self._update(context, cn)
-            return True
-        elif len(cn_candidates) > 1:
-            LOG.error(
-                "Found more than one ComputeNode for nodename %s. "
-                "Please clean up the orphaned ComputeNode records in your DB.",
-                nodename)
-
-        return False
-
     def _init_compute_node(self, context, resources):
         """Initialize the compute node if it does not already exist.
 
@@ -729,6 +686,7 @@ def _init_compute_node(self, context, resources):
             False otherwise
         """
         nodename = resources['hypervisor_hostname']
+        node_uuid = resources['uuid']
 
         # if there is already a compute node just use resources
         # to initialize
@@ -740,16 +698,30 @@ def _init_compute_node(self, context, resources):
 
         # now try to get the compute node record from the
         # database. If we get one we use resources to initialize
-        cn = self._get_compute_node(context, nodename)
+
+        # We use read_deleted=True so that we will find and recover a deleted
+        # node object, if necessary.
+        with utils.temporary_mutation(context, read_deleted='yes'):
+            cn = self._get_compute_node(context, node_uuid)
+            if cn and cn.deleted:
+                # Undelete and save this right now so that everything below
+                # can continue without read_deleted=yes
+                LOG.info('Undeleting compute node %s', cn.uuid)
+                cn.deleted = False
+                cn.deleted_at = None
+                cn.save()
         if cn:
+            if cn.host != self.host:
+                LOG.info("ComputeNode %(name)s moving from %(old)s to %(new)s",
+                         {"name": nodename, "old": cn.host, "new": self.host})
+                cn.host = self.host
+                self._update(context, cn)
+
             self.compute_nodes[nodename] = cn
             self._copy_resources(cn, resources)
             self._setup_pci_tracker(context, cn, resources)
             return False
 
-        if self._check_for_nodes_rebalance(context, resources, nodename):
-            return False
-
         # there was no local copy and none in the database
         # so we need to create a new compute node. This needs
         # to be initialized with resource values.
@@ -889,6 +861,14 @@ def update_available_resource(self, context, nodename, startup=False):
         # contains a non-None value, even for non-Ironic nova-compute hosts. It
         # is this value that will be populated in the compute_nodes table.
         resources['host_ip'] = CONF.my_ip
+        if 'uuid' not in resources:
+            # NOTE(danms): Any driver that does not provide a uuid per
+            # node gets the locally-persistent compute_id. Only ironic
+            # should be setting the per-node uuid (and returning
+            # multiple nodes in general). If this is the first time we
+            # are creating a compute node on this host, we will
+            # generate and persist this uuid for the future.
+            resources['uuid'] = node.get_local_node_uuid()
 
         # We want the 'cpu_info' to be None from the POV of the
         # virt driver, but the DB requires it to be non-null so
@@ -1014,14 +994,13 @@ def _update_available_resource(self, context, resources, startup=False):
         if startup:
             self._check_resources(context)
 
-    def _get_compute_node(self, context, nodename):
+    def _get_compute_node(self, context, node_uuid):
         """Returns compute node for the host and nodename."""
         try:
-            return objects.ComputeNode.get_by_host_and_nodename(
-                context, self.host, nodename)
+            return objects.ComputeNode.get_by_uuid(context, node_uuid)
         except exception.NotFound:
             LOG.warning("No compute node record for %(host)s:%(node)s",
-                        {'host': self.host, 'node': nodename})
+                        {'host': self.host, 'node': node_uuid})
 
     def _report_hypervisor_resource_view(self, resources):
         """Log the hypervisor's view of free resources.
 
@@ -388,8 +388,11 @@ def update_from_virt_driver(self, resources):
                 # The uuid field is read-only so it should only be set when
                 # creating the compute node record for the first time. Ignore
                 # it otherwise.
-                if key == 'uuid' and 'uuid' in self:
-                    continue
+                if (key == 'uuid' and 'uuid' in self and
+                        resources[key] != self.uuid):
+                    raise exception.InvalidNodeConfiguration(
+                        reason='Attempt to overwrite node %s with %s!' % (
+                            self.uuid, resources[key]))
                 setattr(self, key, resources[key])
 
         # supported_instances has a different name in compute_node
 
@@ -181,6 +181,9 @@ def _start_compute(hostname, host_info):
         with mock.patch('nova.virt.node.get_local_node_uuid') as m:
             m.return_value = str(getattr(uuids, 'node_%s' % hostname))
             self.computes[hostname] = _start_compute(hostname, host_info)
+            # We need to trigger libvirt.Host() to capture the node-local
+            # uuid while we have it mocked out.
+            self.computes[hostname].driver._host.get_node_uuid()
 
         self.compute_rp_uuids[hostname] = self.placement.get(
             '/resource_providers?name=%s' % hostname).body[
 
@@ -1160,26 +1160,25 @@ def test_init_host_with_in_progress_evacuations(self, mock_destroy_evac,
             self.context, {active_instance.uuid, evacuating_instance.uuid},
             mock_get_nodes.return_value.keys())
 
-    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
+    @mock.patch.object(objects.ComputeNodeList, 'get_all_by_uuids')
     @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
-    def test_get_nodes(self, mock_driver_get_nodes, mock_get_by_host_and_node):
+    def test_get_nodes(self, mock_driver_get_nodes, mock_get_by_uuid):
         mock_driver_get_nodes.return_value = ['fake_node1', 'fake_node2']
         # NOTE(danms): The fake driver, by default, uses
         # uuidsentinel.$node_name, so we can predict the uuids it will
         # return here.
         cn1 = objects.ComputeNode(uuid=uuids.fake_node1)
         cn2 = objects.ComputeNode(uuid=uuids.fake_node2)
-        mock_get_by_host_and_node.side_effect = [cn1, cn2]
+        mock_get_by_uuid.return_value = [cn1, cn2]
 
         nodes = self.compute._get_nodes(self.context)
 
         self.assertEqual({uuids.fake_node1: cn1, uuids.fake_node2: cn2}, nodes)
 
         mock_driver_get_nodes.assert_called_once_with()
-        mock_get_by_host_and_node.assert_has_calls([
-            mock.call(self.context, self.compute.host, 'fake_node1'),
-            mock.call(self.context, self.compute.host, 'fake_node2')
-        ])
+        mock_get_by_uuid.assert_called_once_with(self.context,
+                                                 [uuids.fake_node1,
+                                                  uuids.fake_node2])
 
     @mock.patch.object(manager.LOG, 'warning')
     @mock.patch.object(
@@ -1201,29 +1200,25 @@ def test_get_nodes_driver_not_ready(
             "is starting on this host, then you can ignore this warning.")
 
     @mock.patch.object(manager.LOG, 'warning')
-    @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename')
-    @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes')
+    @mock.patch.object(objects.ComputeNodeList, 'get_all_by_uuids')
+    @mock.patch.object(fake_driver.FakeDriver, 'get_available_node_uuids')
     def test_get_nodes_node_not_found(
-            self, mock_driver_get_nodes, mock_get_by_host_and_node,
+            self, mock_driver_get_nodes, mock_get_all_by_uuids,
             mock_log_warning):
-        mock_driver_get_nodes.return_value = ['fake-node1', 'fake-node2']
-        cn2 = objects.ComputeNode(uuid=uuids.cn2)
-        mock_get_by_host_and_node.side_effect = [
-            exception.ComputeHostNotFound(host='fake-node1'), cn2]
+        mock_driver_get_nodes.return_value = ['fake-node1']
+        mock_get_all_by_uuids.return_value = []
 
         nodes = self.compute._get_nodes(self.context)
 
-        self.assertEqual({uuids.cn2: cn2}, nodes)
+        self.assertEqual({}, nodes)
 
         mock_driver_get_nodes.assert_called_once_with()
-        mock_get_by_host_and_node.assert_has_calls([
-            mock.call(self.context, self.compute.host, 'fake-node1'),
-            mock.call(self.context, self.compute.host, 'fake-node2')
-        ])
+        mock_get_all_by_uuids.assert_called_once_with(self.context,
+                                                      ['fake-node1'])
         mock_log_warning.assert_called_once_with(
-            "Compute node %s not found in the database. If this is the first "
-            "time this service is starting on this host, then you can ignore "
-            "this warning.", 'fake-node1')
+            "Compute nodes %s for host %s were not found in the database. "
+            "If this is the first time this service is starting on this host, "
+            "then you can ignore this warning.", ['fake-node1'], 'fake-mini')
 
     def test_init_host_disk_devices_configuration_failure(self):
         self.flags(max_disk_devices_to_attach=0, group='compute')