Skip to content

Commit 2f4feea

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Map PCI pools to RP UUIDs"
2 parents d7de0c1 + e96601c commit 2f4feea

File tree

6 files changed

+236
-6
lines changed

6 files changed

+236
-6
lines changed

nova/compute/pci_placement_translator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,12 @@ def update_provider_tree(
261261
)
262262
provider_tree.update_traits(self.name, self.traits)
263263

264+
# Here we are sure the RP exists in the provider_tree. So, we can
265+
# record the RP UUID in each PciDevice this RP represents
266+
rp_uuid = provider_tree.data(self.name).uuid
267+
for dev in self.devs:
268+
dev.extra_info['rp_uuid'] = rp_uuid
269+
264270
def update_allocations(
265271
self,
266272
allocations: dict,
@@ -598,6 +604,11 @@ def update_provider_tree_for_pci(
598604

599605
pv.update_provider_tree(provider_tree)
600606
old_alloc = copy.deepcopy(allocations)
607+
# update_provider_tree correlated the PciDevice objects with RPs in
608+
# placement and recorded the RP UUID in the PciDevice object. We need to
609+
# trigger an update on the device pools in the tracker to get the device
610+
# RP UUID mapped to the device pools
611+
pci_tracker.stats.populate_pools_metadata_from_assigned_devices()
601612
updated = pv.update_allocations(allocations, provider_tree)
602613

603614
if updated:

nova/compute/resource_tracker.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -991,8 +991,6 @@ def _update_available_resource(self, context, resources, startup=False):
991991
# notified when instances are deleted, we need remove all usages
992992
# from deleted instances.
993993
self.pci_tracker.clean_usage(instances, migrations)
994-
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
995-
cn.pci_device_pools = dev_pools_obj
996994

997995
self._report_final_resource_view(nodename)
998996

@@ -1314,13 +1312,23 @@ def _update_to_placement(self, context, compute_node, startup):
13141312

13151313
def _update(self, context, compute_node, startup=False):
13161314
"""Update partial stats locally and populate them to Scheduler."""
1315+
1316+
self._update_to_placement(context, compute_node, startup)
1317+
1318+
if self.pci_tracker:
1319+
# sync PCI device pool state stored in the compute node with
1320+
# the actual state from the PCI tracker as we commit changes in
1321+
# the DB and in the PCI tracker below
1322+
dev_pools_obj = self.pci_tracker.stats.to_device_pools_obj()
1323+
compute_node.pci_device_pools = dev_pools_obj
1324+
13171325
# _resource_change will update self.old_resources if it detects changes
13181326
# but we want to restore those if compute_node.save() fails.
13191327
nodename = compute_node.hypervisor_hostname
13201328
old_compute = self.old_resources[nodename]
13211329
if self._resource_change(compute_node):
13221330
# If the compute_node's resource changed, update to DB. Note that
1323-
# _update_to_placement below does not supersede the need to do this
1331+
# _update_to_placement above does not supersede the need to do this
13241332
# because there are stats-related fields in the ComputeNode object
13251333
# which could have changed and still need to be reported to the
13261334
# scheduler filters/weighers (which could be out of tree as well).
@@ -1333,8 +1341,6 @@ def _update(self, context, compute_node, startup=False):
13331341
with excutils.save_and_reraise_exception(logger=LOG):
13341342
self.old_resources[nodename] = old_compute
13351343

1336-
self._update_to_placement(context, compute_node, startup)
1337-
13381344
if self.pci_tracker:
13391345
self.pci_tracker.save(context)
13401346

nova/pci/stats.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def _find_pool(self, dev_pool: Pool) -> ty.Optional[Pool]:
9696
pool_keys = pool.copy()
9797
del pool_keys['count']
9898
del pool_keys['devices']
99+
# FIXME(gibi): do we need this?
100+
pool_keys.pop('rp_uuid', None)
99101
if (len(pool_keys.keys()) == len(dev_pool.keys()) and
100102
self._equal_properties(dev_pool, pool_keys, list(dev_pool))):
101103
return pool
@@ -779,3 +781,40 @@ def has_remote_managed_device_pools(self) -> bool:
779781
)
780782
pools = self._filter_pools_for_spec(self.pools, dummy_req)
781783
return bool(pools)
784+
785+
def populate_pools_metadata_from_assigned_devices(self):
786+
"""Populate the rp_uuid of each pool based on the rp_uuid of the
787+
devices assigned to the pool. This can only be called from the compute
788+
where devices are assigned to each pool. This should not be called from
789+
the scheduler as there device - pool assignment is not known.
790+
"""
791+
# PciDevices are tracked in placement and flavor based PCI requests
792+
# are scheduled and allocated in placement. To be able to correlate
793+
# what is allocated in placement and what is consumed in nova we
794+
# need to map device pools to RPs. We can do that as the PciDevice
795+
# contains the RP UUID that represents it in placement.
796+
# NOTE(gibi): We cannot do this when the device is originally added to
797+
# the pool as the device -> placement translation, that creates the
798+
# RPs, runs after all the device is created and assigned to pools.
799+
for pool in self.pools:
800+
pool_rps = {
801+
dev.extra_info.get("rp_uuid")
802+
for dev in pool["devices"]
803+
if "rp_uuid" in dev.extra_info
804+
}
805+
if len(pool_rps) >= 2:
806+
# FIXME(gibi): Do we have a 1:1 pool - RP mapping even
807+
# if two PFs providing very similar VFs?
808+
raise ValueError(
809+
"We have a pool %s connected to more than one RPs %s in "
810+
"placement via devs %s" % (pool, pool_rps, pool["devices"])
811+
)
812+
813+
if not pool_rps:
814+
# this can happen if the nova-compute is upgraded to have the
815+
# PCI in placement inventory handling code but
816+
# [pci]report_in_placement is not turned on yet.
817+
continue
818+
819+
if pool_rps: # now we know that it is a single RP
820+
pool['rp_uuid'] = next(iter(pool_rps))

nova/tests/unit/compute/test_pci_placement_translator.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
# License for the specific language governing permissions and limitations
1313
# under the License.
1414
import ddt
15+
from oslo_utils.fixture import uuidsentinel as uuids
1516
from unittest import mock
1617

1718
from nova.compute import pci_placement_translator as ppt
19+
from nova.compute import provider_tree
1820
from nova import exception
1921
from nova.objects import fields
2022
from nova.objects import pci_device
23+
from nova.pci import devspec
2124
from nova import test
2225

2326

@@ -235,3 +238,54 @@ def test_mixed_rc_for_sibling_vfs(self):
235238
"CUSTOM_BAR,CUSTOM_BAZ,CUSTOM_FOO for 0000:81:00.0,0000:81:00.1.",
236239
str(ex),
237240
)
241+
242+
def test_translator_maps_pci_device_to_rp(self):
243+
pv = ppt.PlacementView(
244+
"fake-node", instances_under_same_host_resize=[])
245+
vf = pci_device.PciDevice(
246+
address="0000:81:00.1",
247+
parent_addr="0000:71:00.0",
248+
dev_type=fields.PciDeviceType.SRIOV_VF,
249+
vendor_id="dead",
250+
product_id="beef",
251+
)
252+
pf = pci_device.PciDevice(
253+
address="0000:72:00.0",
254+
parent_addr=None,
255+
dev_type=fields.PciDeviceType.SRIOV_PF,
256+
vendor_id="dead",
257+
product_id="beef",
258+
)
259+
pt = provider_tree.ProviderTree()
260+
pt.new_root("fake-node", uuids.compute_rp)
261+
262+
pv._add_dev(vf, {})
263+
pv._add_dev(pf, {})
264+
pv.update_provider_tree(pt)
265+
266+
self.assertEqual(
267+
pt.data("fake-node_0000:71:00.0").uuid, vf.extra_info["rp_uuid"]
268+
)
269+
self.assertEqual(
270+
pt.data("fake-node_0000:72:00.0").uuid, pf.extra_info["rp_uuid"]
271+
)
272+
273+
def test_update_provider_tree_for_pci_update_pools(self):
274+
pt = provider_tree.ProviderTree()
275+
pt.new_root("fake-node", uuids.compute_rp)
276+
pf = pci_device.PciDevice(
277+
address="0000:72:00.0",
278+
parent_addr=None,
279+
dev_type=fields.PciDeviceType.SRIOV_PF,
280+
vendor_id="dead",
281+
product_id="beef",
282+
status=fields.PciDeviceStatus.AVAILABLE,
283+
)
284+
pci_tracker = mock.Mock()
285+
pci_tracker.pci_devs = [pf]
286+
pci_tracker.dev_filter.specs = [devspec.PciDeviceSpec({})]
287+
288+
ppt.update_provider_tree_for_pci(pt, 'fake-node', pci_tracker, {}, [])
289+
290+
pci_tracker.stats.populate_pools_metadata_from_assigned_devices.\
291+
assert_called_once_with()

nova/tests/unit/compute/test_resource_tracker.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,7 @@ def test_existing_compute_node_updated_new_resources(self, save_mock):
15801580
self.rt._update(mock.sentinel.ctx, new_compute)
15811581
save_mock.assert_called_once_with()
15821582

1583+
@mock.patch('nova.objects.ComputeNode.save', new=mock.Mock())
15831584
@mock.patch(
15841585
'nova.pci.stats.PciDeviceStats.has_remote_managed_device_pools',
15851586
return_value=True)
@@ -1773,7 +1774,7 @@ def test_update_retry_raises(self, mock_resource_change,
17731774
self.assertEqual(4, ufpt_mock.call_count)
17741775
self.assertEqual(4, mock_sync_disabled.call_count)
17751776
# The retry is restricted to _update_to_placement
1776-
self.assertEqual(1, mock_resource_change.call_count)
1777+
self.assertEqual(0, mock_resource_change.call_count)
17771778

17781779
@mock.patch(
17791780
'nova.compute.resource_tracker.ResourceTracker.'
@@ -2041,6 +2042,10 @@ def test_sync_compute_service_disabled_trait_service_not_found(
20412042
self.assertIn('Unable to find services table record for nova-compute',
20422043
mock_log_error.call_args[0][0])
20432044

2045+
@mock.patch(
2046+
'nova.compute.resource_tracker.ResourceTracker.'
2047+
'_update_to_placement',
2048+
new=mock.Mock())
20442049
def test_update_compute_node_save_fails_restores_old_resources(self):
20452050
"""Tests the scenario that compute_node.save() fails and the
20462051
old_resources value for the node is restored to its previous value

nova/tests/unit/pci/test_stats.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from oslo_config import cfg
1919
from oslo_serialization import jsonutils
20+
from oslo_utils.fixture import uuidsentinel as uuids
2021

2122
from nova import exception
2223
from nova import objects
@@ -896,6 +897,120 @@ def test_filter_pools_for_spec_ignores_rc_and_traits_in_spec(self):
896897

897898
self.assertEqual(pools, matching_pools)
898899

900+
def test_populate_pools_metadata_from_assigned_devices(self):
901+
device_spec = [
902+
jsonutils.dumps(
903+
{
904+
"address": "0000:81:00.*",
905+
}
906+
),
907+
]
908+
self.flags(device_spec=device_spec, group="pci")
909+
dev_filter = whitelist.Whitelist(device_spec)
910+
pci_stats = stats.PciDeviceStats(
911+
objects.NUMATopology(),
912+
dev_filter=dev_filter)
913+
pci_dev1 = objects.PciDevice(
914+
vendor_id="dead",
915+
product_id="beef",
916+
address="0000:81:00.1",
917+
parent_addr="0000:81:00.0",
918+
numa_node=0,
919+
dev_type="type-VF",
920+
)
921+
pci_dev2 = objects.PciDevice(
922+
vendor_id="dead",
923+
product_id="beef",
924+
address="0000:81:00.2",
925+
parent_addr="0000:81:00.0",
926+
numa_node=0,
927+
dev_type="type-VF",
928+
)
929+
pci_stats.add_device(pci_dev1)
930+
pci_dev1.extra_info = {'rp_uuid': uuids.rp1}
931+
pci_stats.add_device(pci_dev2)
932+
pci_dev2.extra_info = {'rp_uuid': uuids.rp1}
933+
934+
self.assertEqual(1, len(pci_stats.pools))
935+
936+
pci_stats.populate_pools_metadata_from_assigned_devices()
937+
938+
self.assertEqual(uuids.rp1, pci_stats.pools[0]['rp_uuid'])
939+
940+
def test_populate_pools_metadata_from_assigned_devices_device_without_rp(
941+
self
942+
):
943+
device_spec = [
944+
jsonutils.dumps(
945+
{
946+
"address": "0000:81:00.*",
947+
}
948+
),
949+
]
950+
self.flags(device_spec=device_spec, group="pci")
951+
dev_filter = whitelist.Whitelist(device_spec)
952+
pci_stats = stats.PciDeviceStats(
953+
objects.NUMATopology(),
954+
dev_filter=dev_filter)
955+
pci_dev1 = objects.PciDevice(
956+
vendor_id="dead",
957+
product_id="beef",
958+
address="0000:81:00.1",
959+
parent_addr="0000:81:00.0",
960+
numa_node=0,
961+
dev_type="type-VF",
962+
)
963+
pci_stats.add_device(pci_dev1)
964+
965+
self.assertEqual(1, len(pci_stats.pools))
966+
967+
pci_stats.populate_pools_metadata_from_assigned_devices()
968+
969+
self.assertNotIn('rp_uuid', pci_stats.pools[0])
970+
971+
def test_populate_pools_metadata_from_assigned_devices_multiple_rp(self):
972+
device_spec = [
973+
jsonutils.dumps(
974+
{
975+
"address": "0000:81:00.*",
976+
}
977+
),
978+
]
979+
self.flags(device_spec=device_spec, group="pci")
980+
dev_filter = whitelist.Whitelist(device_spec)
981+
pci_stats = stats.PciDeviceStats(
982+
objects.NUMATopology(),
983+
dev_filter=dev_filter)
984+
pci_dev1 = objects.PciDevice(
985+
compute_node_id=1,
986+
vendor_id="dead",
987+
product_id="beef",
988+
address="0000:81:00.1",
989+
parent_addr="0000:81:00.0",
990+
numa_node=0,
991+
dev_type="type-VF",
992+
)
993+
pci_dev2 = objects.PciDevice(
994+
compute_node_id=1,
995+
vendor_id="dead",
996+
product_id="beef",
997+
address="0000:81:00.2",
998+
parent_addr="0000:81:00.0",
999+
numa_node=0,
1000+
dev_type="type-VF",
1001+
)
1002+
pci_stats.add_device(pci_dev1)
1003+
pci_dev1.extra_info = {'rp_uuid': uuids.rp1}
1004+
pci_stats.add_device(pci_dev2)
1005+
pci_dev2.extra_info = {'rp_uuid': uuids.rp2}
1006+
1007+
self.assertEqual(1, len(pci_stats.pools))
1008+
1009+
self.assertRaises(
1010+
ValueError,
1011+
pci_stats.populate_pools_metadata_from_assigned_devices,
1012+
)
1013+
8991014

9001015
class PciDeviceVFPFStatsTestCase(test.NoDBTestCase):
9011016

0 commit comments

Comments
 (0)