Skip to content

Commit 3eb23d3

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "FUP for the scheduler part of PCI in placement"
2 parents 171d11e + d53a492 commit 3eb23d3

File tree

7 files changed

+114
-104
lines changed

7 files changed

+114
-104
lines changed

nova/compute/manager.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2471,10 +2471,12 @@ def _build_and_run_instance(self, context, instance, image, injected_files,
24712471

24722472
if provider_mapping:
24732473
try:
2474-
compute_utils\
2475-
.update_pci_request_with_placement_allocations(
2476-
context, self.reportclient,
2477-
instance.pci_requests.requests, provider_mapping)
2474+
compute_utils.update_pci_request_with_placement_allocations(
2475+
context,
2476+
self.reportclient,
2477+
instance.pci_requests.requests,
2478+
provider_mapping,
2479+
)
24782480
except (exception.AmbiguousResourceProviderForPCIRequest,
24792481
exception.UnexpectedResourceProviderNameForPCIRequest
24802482
) as e:
@@ -3789,10 +3791,12 @@ def _do_rebuild_instance_with_claim(
37893791
provider_mapping = self._get_request_group_mapping(request_spec)
37903792

37913793
if provider_mapping:
3792-
compute_utils.\
3793-
update_pci_request_with_placement_allocations(
3794-
context, self.reportclient,
3795-
instance.pci_requests.requests, provider_mapping)
3794+
compute_utils.update_pci_request_with_placement_allocations(
3795+
context,
3796+
self.reportclient,
3797+
instance.pci_requests.requests,
3798+
provider_mapping,
3799+
)
37963800

37973801
claim_context = rebuild_claim(
37983802
context, instance, scheduled_node, allocations,
@@ -5415,10 +5419,12 @@ def _prep_resize(
54155419

54165420
if provider_mapping:
54175421
try:
5418-
compute_utils.\
5419-
update_pci_request_with_placement_allocations(
5420-
context, self.reportclient,
5421-
instance.pci_requests.requests, provider_mapping)
5422+
compute_utils.update_pci_request_with_placement_allocations(
5423+
context,
5424+
self.reportclient,
5425+
instance.pci_requests.requests,
5426+
provider_mapping,
5427+
)
54225428
except (exception.AmbiguousResourceProviderForPCIRequest,
54235429
exception.UnexpectedResourceProviderNameForPCIRequest
54245430
) as e:
@@ -6899,12 +6905,12 @@ def _unshelve_instance(self, context, instance, image, filter_properties,
68996905

69006906
try:
69016907
if provider_mappings:
6902-
update = (
6903-
compute_utils.
6904-
update_pci_request_with_placement_allocations)
6905-
update(
6906-
context, self.reportclient, instance.pci_requests.requests,
6907-
provider_mappings)
6908+
compute_utils.update_pci_request_with_placement_allocations(
6909+
context,
6910+
self.reportclient,
6911+
instance.pci_requests.requests,
6912+
provider_mappings,
6913+
)
69086914

69096915
accel_info = []
69106916
if accel_uuids:
@@ -7984,12 +7990,12 @@ def _allocate_port_resource_for_instance(
79847990
instance_uuid=instance.uuid) from e
79857991

79867992
try:
7987-
update = (
7988-
compute_utils.
7989-
update_pci_request_with_placement_allocations)
7990-
update(
7991-
context, self.reportclient, pci_reqs.requests,
7992-
provider_mappings)
7993+
compute_utils.update_pci_request_with_placement_allocations(
7994+
context,
7995+
self.reportclient,
7996+
pci_reqs.requests,
7997+
provider_mappings,
7998+
)
79937999
except (
79948000
exception.AmbiguousResourceProviderForPCIRequest,
79958001
exception.UnexpectedResourceProviderNameForPCIRequest

nova/conf/pci.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@
7070
``resource_class``
7171
The optional Placement resource class name that is used
7272
to track the requested PCI devices in Placement. It can be a standard
73-
resource class from the ``os-resource-classes`` lib. Or can be any string.
74-
In that case Nova will normalize it to a proper Placement resource class by
73+
resource class from the ``os-resource-classes`` lib. Or it can be an
74+
arbitrary string. If it is an non-standard resource class then Nova will
75+
normalize it to a proper Placement resource class by
7576
making it upper case, replacing any consecutive character outside of
7677
``[A-Z0-9_]`` with a single '_', and prefixing the name with ``CUSTOM_`` if
7778
not yet prefixed. The maximum allowed length is 255 character including the
@@ -85,15 +86,16 @@
8586
``traits``
8687
An optional comma separated list of Placement trait names requested to be
8788
present on the resource provider that fulfills this alias. Each trait can
88-
be a standard trait from ``os-traits`` lib or can be any string. If it is
89-
not a standard trait then Nova will normalize the trait name by making it
90-
upper case, replacing any consecutive character outside of ``[A-Z0-9_]``
91-
with a single '_', and prefixing the name with ``CUSTOM_`` if not yet
92-
prefixed. The maximum allowed length of a trait name is 255 character
93-
including the prefix. Every trait in ``traits`` requested in the alias
94-
ensured to be in the list of traits provided in the ``traits`` field of
95-
the ``[pci]device_spec`` when scheduling the request. This field can only
96-
be used only if ``[filter_scheduler]pci_in_placement`` is enabled.
89+
be a standard trait from ``os-traits`` lib or it can be an arbitrary
90+
string. If it is a non-standard trait then Nova will normalize the
91+
trait name by making it upper case, replacing any consecutive character
92+
outside of ``[A-Z0-9_]`` with a single '_', and prefixing the name
93+
with ``CUSTOM_`` if not yet prefixed. The maximum allowed length of a
94+
trait name is 255 character including the prefix. Every trait in
95+
``traits`` requested in the alias ensured to be in the list of traits
96+
provided in the ``traits`` field of the ``[pci]device_spec`` when
97+
scheduling the request. This field can only be used only if
98+
``[filter_scheduler]pci_in_placement`` is enabled.
9799
98100
* Supports multiple aliases by repeating the option (not by specifying
99101
a list value)::

nova/pci/stats.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,7 @@ def _create_pool_keys_from_dev(
153153
# to split the pools by PCI or PF address. We can still keep
154154
# the VFs from the same parent PF in a single pool though as they
155155
# are equivalent from placement perspective.
156-
address = dev.parent_addr or dev.address
157-
pool['address'] = address
156+
pool['address'] = dev.parent_addr or dev.address
158157

159158
# NOTE(gibi): parent_ifname acts like a tag during pci claim but
160159
# not provided as part of the whitelist spec as it is auto detected
@@ -579,18 +578,13 @@ def _filter_pools_based_on_placement_allocation(
579578
for pool in pools:
580579
rp_uuid = pool.get('rp_uuid')
581580
if rp_uuid is None:
582-
# NOTE(gibi): There can be pools without rp_uuid field if the
583-
# [pci]report_in_placement is not enabled for a compute with
584-
# viable PCI devices. We have a non-empty rp_uuids, so we know
585-
# that the [filter_scheduler]pci_in_placement is enabled. This
586-
# is a configuration error.
587-
LOG.warning(
588-
"The PCI pool %s isn't mapped to an RP UUID but the "
589-
"scheduler is configured to create PCI allocations in "
590-
"placement. This should not happen. Please enable "
591-
"[pci]report_in_placement on all compute hosts before "
592-
"enabling [filter_scheduler]pci_in_placement in the "
593-
"scheduler. This pool is ignored now.", pool)
581+
# NOTE(gibi): As rp_uuids is not empty the scheduler allocated
582+
# PCI resources on this host, so we know that
583+
# [pci]report_in_placement is enabled on this host. But this
584+
# pool has no RP mapping which can only happen if the pool
585+
# contains PCI devices with physical_network tag, as those
586+
# devices not yet reported in placement. But if they are not
587+
# reported then we can ignore them here too.
594588
continue
595589

596590
if (

nova/tests/functional/libvirt/test_pci_in_placement.py

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# under the License.
1414
from unittest import mock
1515

16+
import ddt
1617
import fixtures
1718
import os_resource_classes
1819
import os_traits
@@ -1613,12 +1614,12 @@ def test_heal_allocation_during_same_host_resize(self):
16131614
"compute1", **compute1_expected_placement_view)
16141615

16151616

1616-
class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
1617+
@ddt.ddt
1618+
class SimpleRCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
16171619
def setUp(self):
16181620
super().setUp()
16191621
self.flags(group='filter_scheduler', pci_in_placement=True)
16201622

1621-
def test_boot_with_custom_rc_and_traits(self):
16221623
# The fake libvirt will emulate on the host:
16231624
# * one type-PCI in slot 0
16241625
pci_info = fakelibvirt.HostPCIDevicesInfo(
@@ -1642,7 +1643,7 @@ def test_boot_with_custom_rc_and_traits(self):
16421643
self.start_compute(hostname="compute1", pci_info=pci_info)
16431644

16441645
self.assertPCIDeviceCounts("compute1", total=1, free=1)
1645-
compute1_expected_placement_view = {
1646+
self.compute1_expected_placement_view = {
16461647
"inventories": {
16471648
"0000:81:00.0": {"CUSTOM_GPU": 1},
16481649
},
@@ -1659,73 +1660,74 @@ def test_boot_with_custom_rc_and_traits(self):
16591660
"allocations": {},
16601661
}
16611662
self.assert_placement_pci_view(
1662-
"compute1", **compute1_expected_placement_view)
1663+
"compute1", **self.compute1_expected_placement_view)
16631664

1664-
pci_alias_wrong_rc = {
1665+
@ddt.data(
1666+
{
16651667
"vendor_id": fakelibvirt.PCI_VEND_ID,
16661668
"product_id": fakelibvirt.PCI_PROD_ID,
16671669
"name": "a-gpu-wrong-rc",
1668-
}
1669-
pci_alias_wrong_rc_2 = {
1670+
},
1671+
{
16701672
"resource_class": os_resource_classes.PGPU,
16711673
"name": "a-gpu-wrong-rc-2",
1672-
}
1673-
pci_alias_asking_for_missing_trait = {
1674+
},
1675+
{
16741676
"resource_class": "GPU",
16751677
# NOTE(gibi): "big" is missing from device spec
16761678
"traits": "purple,big",
16771679
"name": "a-gpu-missing-trait",
1678-
}
1680+
},
1681+
)
1682+
def test_boot_with_custom_rc_and_traits_no_matching_device(
1683+
self, pci_alias
1684+
):
1685+
self.flags(group="pci", alias=self._to_list_of_json_str([pci_alias]))
1686+
extra_spec = {"pci_passthrough:alias": f"{pci_alias['name']}:1"}
1687+
flavor_id = self._create_flavor(extra_spec=extra_spec)
1688+
server = self._create_server(
1689+
flavor_id=flavor_id, networks=[], expected_state="ERROR"
1690+
)
1691+
self.assertIn("fault", server)
1692+
self.assertIn("No valid host", server["fault"]["message"])
1693+
1694+
self.assertPCIDeviceCounts("compute1", total=1, free=1)
1695+
self.assert_placement_pci_view(
1696+
"compute1", **self.compute1_expected_placement_view
1697+
)
1698+
1699+
def test_boot_with_custom_rc_and_traits_succeeds(self):
16791700
pci_alias_gpu = {
16801701
"resource_class": "GPU",
16811702
"traits": "HW_GPU_API_VULKAN,PURPLE",
16821703
"name": "a-gpu",
16831704
}
16841705
self.flags(
1685-
group="pci",
1686-
alias=self._to_list_of_json_str(
1687-
[
1688-
pci_alias_wrong_rc,
1689-
pci_alias_wrong_rc_2,
1690-
pci_alias_asking_for_missing_trait,
1691-
pci_alias_gpu,
1692-
]
1693-
),
1694-
)
1695-
1696-
# try to boot with each alias that does not match
1697-
for alias in [
1698-
"a-gpu-wrong-rc",
1699-
"a-gpu-wrong-rc-2",
1700-
"a-gpu-missing-trait",
1701-
]:
1702-
extra_spec = {"pci_passthrough:alias": f"{alias}:1"}
1703-
flavor_id = self._create_flavor(extra_spec=extra_spec)
1704-
server = self._create_server(
1705-
flavor_id=flavor_id, networks=[], expected_state='ERROR')
1706-
self.assertIn('fault', server)
1707-
self.assertIn('No valid host', server['fault']['message'])
1708-
1709-
self.assertPCIDeviceCounts("compute1", total=1, free=1)
1710-
self.assert_placement_pci_view(
1711-
"compute1", **compute1_expected_placement_view)
1712-
1713-
# then boot with the matching alias
1706+
group="pci", alias=self._to_list_of_json_str([pci_alias_gpu])
1707+
)
1708+
17141709
extra_spec = {"pci_passthrough:alias": "a-gpu:1"}
17151710
flavor_id = self._create_flavor(extra_spec=extra_spec)
1716-
server = self._create_server(
1717-
flavor_id=flavor_id, networks=[])
1711+
server = self._create_server(flavor_id=flavor_id, networks=[])
17181712

17191713
self.assertPCIDeviceCounts("compute1", total=1, free=0)
1720-
compute1_expected_placement_view[
1721-
"usages"]["0000:81:00.0"]["CUSTOM_GPU"] = 1
1722-
compute1_expected_placement_view["allocations"][server["id"]] = {
1714+
self.compute1_expected_placement_view["usages"]["0000:81:00.0"][
1715+
"CUSTOM_GPU"
1716+
] = 1
1717+
self.compute1_expected_placement_view["allocations"][server["id"]] = {
17231718
"0000:81:00.0": {"CUSTOM_GPU": 1}
17241719
}
17251720
self.assert_placement_pci_view(
1726-
"compute1", **compute1_expected_placement_view)
1721+
"compute1", **self.compute1_expected_placement_view
1722+
)
17271723
self.assert_no_pci_healing("compute1")
17281724

1725+
1726+
class RCAndTraitBasedPCIAliasTests(PlacementPCIReportingTests):
1727+
def setUp(self):
1728+
super().setUp()
1729+
self.flags(group='filter_scheduler', pci_in_placement=True)
1730+
17291731
def test_device_claim_consistent_with_placement_allocation(self):
17301732
"""As soon as [filter_scheduler]pci_in_placement is enabled the
17311733
nova-scheduler will allocate PCI devices in placement. Then on the

nova/tests/functional/libvirt/test_pci_sriov_servers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,7 +1956,8 @@ def setUp(self):
19561956

19571957
def test_create_server_with_pci_dev_and_numa(self):
19581958
"""Verifies that an instance can be booted with cpu pinning and with an
1959-
assigned pci device.
1959+
assigned pci device with legacy policy and numa info for the pci
1960+
device.
19601961
"""
19611962

19621963
self.flags(cpu_dedicated_set='0-7', group='compute')
@@ -1991,9 +1992,9 @@ def test_create_server_with_pci_dev_and_numa(self):
19911992

19921993
def test_create_server_with_pci_dev_and_numa_fails(self):
19931994
"""This test ensures that it is not possible to allocated CPU and
1994-
memory resources from one NUMA node and a PCI device from another.
1995+
memory resources from one NUMA node and a PCI device from another
1996+
if we use the legacy policy and the pci device reports numa info.
19951997
"""
1996-
19971998
self.flags(cpu_dedicated_set='0-7', group='compute')
19981999

19992000
pci_info = fakelibvirt.HostPCIDevicesInfo(num_pci=1, numa_node=0)

nova/tests/unit/scheduler/test_host_manager.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,12 +1562,14 @@ def test_stat_consumption_from_instance(self,
15621562

15631563
self.assertIsNone(host.updated)
15641564
host.consume_from_request(spec_obj)
1565-
numa_fit_mock.assert_called_once_with(fake_host_numa_topology,
1566-
fake_numa_topology,
1567-
limits=None, pci_requests=None,
1568-
pci_stats=None,
1569-
provider_mapping=None,
1570-
)
1565+
numa_fit_mock.assert_called_once_with(
1566+
fake_host_numa_topology,
1567+
fake_numa_topology,
1568+
limits=None,
1569+
pci_requests=None,
1570+
pci_stats=None,
1571+
provider_mapping=None,
1572+
)
15711573
numa_usage_mock.assert_called_once_with(fake_host_numa_topology,
15721574
fake_numa_topology)
15731575
sync_mock.assert_called_once_with(("fakehost", "fakenode"))

nova/tests/unit/scheduler/test_manager.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2047,6 +2047,8 @@ def test_get_alternate_hosts_returns_main_selection_with_claimed_a_c(
20472047
alloc_reqs_by_rp_uuid[uuids.host1] = [
20482048
{
20492049
"mappings": {
2050+
# This is odd but the un-name request group uses "" as the
2051+
# name of the group.
20502052
"": [uuids.host1],
20512053
uuids.group_req1: [getattr(uuids, f"host1_child{i}")],
20522054
}
@@ -2071,7 +2073,8 @@ def test_get_alternate_hosts_returns_main_selection_with_claimed_a_c(
20712073
self.assertEqual(1, len(selections))
20722074
selection = selections[0]
20732075
self.assertEqual(uuids.host1, selection.compute_node_uuid)
2074-
# we expect that host1_child2 candidate is selected
2076+
# we expect that host1_child2 candidate is selected as the
2077+
# DropFirstFilter will drop host1_child1
20752078
expected_a_c = {
20762079
"mappings": {
20772080
"": [uuids.host1],

0 commit comments

Comments
 (0)