hardware: Reject requests for no hyperthreads on hosts with HT

stephenfin · stephenfin · commit 9c270332041d · 2020-07-31T13:30:17.000+01:00
Attempting to boot an instance with 'hw:cpu_policy=dedicated' will
result in a request from nova-scheduler to placement for allocation
candidates with $flavor.vcpu 'PCPU' inventory. Similarly, booting an
instance with 'hw:cpu_thread_policy=isolate' will result in a request
for allocation candidates with 'HW_CPU_HYPERTHREADING=forbidden', i.e.
hosts without hyperthreading. This has been the case since the
cpu-resources feature was implemented in Train. However, as part of that
work and to enable upgrades from hosts that predated Train, we also make
a second request for candidates with $flavor.vcpu 'VCPU' inventory. The
idea behind this is that old compute nodes would only report 'VCPU' and
should be useable, and any new compute nodes that got caught up in this
second request could never actually be scheduled to since there wouldn't
be enough cores from 'ComputeNode.numa_topology.cells.[*].pcpuset'
available to schedule to, resulting in rejection by the
'NUMATopologyFilter'. However, if a host was rejected in the first
query because it reported the 'HW_CPU_HYPERTHREADING' trait, it could
get picked up by the second query and would happily be scheduled to,
resulting in an instance consuming 'VCPU' inventory from a host that
properly supported 'PCPU' inventory.

The solution is simply, though also a huge hack. If we detect that the
host is using new style configuration and should be able to report
'PCPU', check if the instance asked for no hyperthreading and whether
the host has it. If all are True, reject the request.

Change-Id: Id39aaaac09585ca1a754b669351c86e234b89dd9
Signed-off-by: Stephen Finucane &lt;stephenfin@redhat.com&gt;
Closes-Bug: #1889633
diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py
@@ -383,12 +383,7 @@ def test_create_server_with_isolate_thread_policy_fails(self):
         }
         flavor_id = self._create_flavor(vcpu=2, extra_spec=extra_spec)
 
-        # FIXME(stephenfin): This should go to error status since there should
-        # not be a host available
-        expected_usage = {
-            'DISK_GB': 20, 'MEMORY_MB': 2048, 'PCPU': 0, 'VCPU': 2,
-        }
-        self._run_build_test(flavor_id, expected_usage=expected_usage)
+        self._run_build_test(flavor_id, end_status='ERROR')
 
     def test_create_server_with_pcpu(self):
         """Create a server using an explicit 'resources:PCPU' request.
diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py
@@ -3749,10 +3749,13 @@ def test_get_pinning_host_siblings_large_instance_odd_fit(self):
         got_topo = objects.VirtCPUTopology(sockets=1, cores=5, threads=1)
         self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_get_pinning_isolate_policy_too_few_fully_free_cores(self):
         host_pin = objects.NUMACell(
             id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
             pcpuset=set([0, 1, 2, 3]),
             memory=4096,
             memory_usage=0,
@@ -3774,10 +3777,13 @@ def test_get_pinning_isolate_policy_too_few_fully_free_cores(self):
 
         self.assertIsNone(inst_pin)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_get_pinning_isolate_policy_no_fully_free_cores(self):
         host_pin = objects.NUMACell(
             id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
             pcpuset=set([0, 1, 2, 3]),
             memory=4096,
             memory_usage=0,
@@ -3799,10 +3805,13 @@ def test_get_pinning_isolate_policy_no_fully_free_cores(self):
 
         self.assertIsNone(inst_pin)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_get_pinning_isolate_policy_fits(self):
         host_pin = objects.NUMACell(
             id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
             pcpuset=set([0, 1, 2, 3]),
             memory=4096,
             memory_usage=0,
@@ -3825,10 +3834,13 @@ def test_get_pinning_isolate_policy_fits(self):
         got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
         self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_get_pinning_isolate_policy_fits_ht_host(self):
         host_pin = objects.NUMACell(
             id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3]),
             pcpuset=set([0, 1, 2, 3]),
             memory=4096,
             memory_usage=0,
@@ -3852,10 +3864,13 @@ def test_get_pinning_isolate_policy_fits_ht_host(self):
         got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
         self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_get_pinning_isolate_policy_fits_w_usage(self):
         host_pin = objects.NUMACell(
             id=0,
-            cpuset=set(),
+            # Simulate a legacy host with vcpu_pin_set configuration,
+            # meaning cpuset == pcpuset
+            cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
             pcpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
             memory=4096,
             memory_usage=0,
@@ -3879,6 +3894,38 @@ def test_get_pinning_isolate_policy_fits_w_usage(self):
         got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=1)
         self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
+    @mock.patch.object(hw, 'LOG')
+    def test_get_pinning_isolate_policy_bug_1889633(self, mock_log):
+        host_pin = objects.NUMACell(
+            id=0,
+            cpuset={0, 1, 4, 5},
+            pcpuset={2, 3, 6, 7},
+            memory=4096,
+            memory_usage=0,
+            pinned_cpus=set(),
+            siblings=[{0, 4}, {1, 5}, {2, 6}, {3, 7}],
+            mempages=[],
+        )
+        inst_pin = objects.InstanceNUMACell(
+            cpuset=set(),
+            pcpuset={0, 1},
+            memory=2048,
+            cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+            cpu_thread_policy=fields.CPUThreadAllocationPolicy.ISOLATE,
+        )
+        limits = objects.NUMATopologyLimits(
+            cpu_allocation_ratio=2, ram_allocation_ratio=2,
+        )
+
+        # host has hyperthreads, which means no NUMA topology should be built
+        inst_topo = hw._numa_fit_instance_cell(host_pin, inst_pin, limits)
+        self.assertIsNone(inst_topo)
+        self.assertIn(
+            'Host supports hyperthreads, but instance requested no',
+            mock_log.warning.call_args[0][0],
+        )
+
 
 class CPUPinningTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
     def test_host_numa_fit_instance_to_host_single_cell(self):
@@ -4898,11 +4945,14 @@ def test_isolate_full_usage(self):
 
         self.assertEqual(set([0, 1]), host_topo.cells[0].pinned_cpus)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_isolate_w_isolate_thread_alloc(self):
         host_topo = objects.NUMATopology(cells=[
             objects.NUMACell(
                 id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([0, 1, 2, 3, 4, 5]),
                 pcpuset=set([0, 1, 2, 3, 4, 5]),
                 memory=2048,
                 cpu_usage=0,
@@ -4925,11 +4975,14 @@ def test_isolate_w_isolate_thread_alloc(self):
         self.assertEqual({0: 0, 1: 2}, inst_topo.cells[0].cpu_pinning)
         self.assertEqual(set([4]), inst_topo.cells[0].cpuset_reserved)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_isolate_w_isolate_thread_alloc_usage(self):
         host_topo = objects.NUMATopology(cells=[
             objects.NUMACell(
                 id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([0, 1, 2, 3, 4, 5]),
                 pcpuset=set([0, 1, 2, 3, 4, 5]),
                 memory=2048,
                 cpu_usage=0,
@@ -5038,11 +5091,14 @@ def test_asymmetric_host(self):
         self.assertEqual({0: 2, 1: 3}, inst_topo.cells[0].cpu_pinning)
         self.assertEqual(set([1]), inst_topo.cells[0].cpuset_reserved)
 
+    # TODO(stephenfin): Remove when we drop support for vcpu_pin_set
     def test_asymmetric_host_w_isolate_thread_alloc(self):
         host_topo = objects.NUMATopology(cells=[
             objects.NUMACell(
                 id=0,
-                cpuset=set(),
+                # Simulate a legacy host with vcpu_pin_set configuration,
+                # meaning cpuset == pcpuset
+                cpuset=set([1, 2, 3, 4, 5]),
                 pcpuset=set([1, 2, 3, 4, 5]),
                 memory=2048,
                 cpu_usage=0,
@@ -5052,8 +5108,7 @@ def test_asymmetric_host_w_isolate_thread_alloc(self):
                 mempages=[objects.NUMAPagesTopology(
                     size_kb=4, total=524288, used=0)])])
         inst_topo = objects.InstanceNUMATopology(
-            emulator_threads_policy=(
-                fields.CPUEmulatorThreadsPolicy.ISOLATE),
+            emulator_threads_policy=fields.CPUEmulatorThreadsPolicy.ISOLATE,
             cells=[objects.InstanceNUMACell(
                 id=0,
                 cpuset=set(), pcpuset=set([0, 1]), memory=2048,
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
@@ -918,6 +918,30 @@ def _get_reserved(sibling_set, vcpus_pinning, num_cpu_reserved=0,
                       'for the isolate policy without this.')
             return
 
+        # TODO(stephenfin): Drop this when we drop support for 'vcpu_pin_set'
+        # NOTE(stephenfin): This is total hack. We're relying on the fact that
+        # the libvirt driver, which is the only one that currently supports
+        # pinned CPUs, will set cpuset and pcpuset to the same value if using
+        # legacy configuration, i.e. 'vcpu_pin_set', as part of
+        # '_get_host_numa_topology'. They can't be equal otherwise since
+        # 'cpu_dedicated_set' and 'cpu_shared_set' must be disjoint. Therefore,
+        # if these are equal, the host that this NUMA cell corresponds to is
+        # using legacy configuration and it's okay to use the old, "pin a core
+        # and reserve its siblings" implementation of the 'isolate' policy. If
+        # they're not, the host is using new-style configuration and we've just
+        # hit bug #1889633
+        if threads_per_core != 1 and host_cell.pcpuset != host_cell.cpuset:
+            LOG.warning(
+                "Host supports hyperthreads, but instance requested no "
+                "hyperthreads. This should have been rejected by the "
+                "scheduler but we likely got here due to the fallback VCPU "
+                "query. Consider setting '[workarounds] "
+                "disable_fallback_pcpu_query' to 'True' once hosts are no "
+                "longer using 'vcpu_pin_set'. Refer to bug #1889633 for more "
+                "information."
+            )
+            return
+
         pinning = _get_pinning(
             1,  # we only want to "use" one thread per core
             sibling_sets[threads_per_core],
diff --git a/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml b/releasenotes/notes/bug-1889633-37e524fb6c20fbdf.yaml
@@ -0,0 +1,9 @@
+---
+fixes:
+  - |
+    An issue that could result in instances with the ``isolate`` thread policy
+    (``hw:cpu_thread_policy=isolate``) being scheduled to hosts with SMT
+    (HyperThreading) and consuming ``VCPU`` instead of ``PCPU`` has been
+    resolved. See `bug #1889633`__ for more information.
+
+    .. __: https://bugs.launchpad.net/nova/+bug/1889633