Skip to content

Commit ab439da

Browse files
committed
Heal allocation for same host resize
Same host resize needs special handling in the allocation healing logic as both the source and the dest host PCI devices are visible to the healing code as the PciDevice.instance_uuid points to the healed instance in both cases. blueprint: pci-device-tracking-in-placement Change-Id: Id02e445c55fc956965b7d725f0260876d42422f2
1 parent 98e9989 commit ab439da

File tree

6 files changed

+227
-9
lines changed

6 files changed

+227
-9
lines changed

nova/compute/pci_placement_translator.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ def update_provider_tree(
254254
def update_allocations(
255255
self,
256256
allocations: dict,
257-
provider_tree: provider_tree.ProviderTree
257+
provider_tree: provider_tree.ProviderTree,
258+
same_host_instances: ty.List[str],
258259
) -> bool:
259260
updated = False
260261

@@ -293,6 +294,21 @@ def update_allocations(
293294
# heal_allocation CLI instead.
294295
continue
295296

297+
if consumer in same_host_instances:
298+
# This is a nasty special case. This instance is undergoing
299+
# a same host resize. So in Placement the source host
300+
# allocation is held by the migration UUID *but* the
301+
# PciDevice.instance_uuid is set for the instance UUID both
302+
# on the source and on the destination host. As the source and
303+
# dest are the same for migration we will see PciDevice
304+
# objects assigned to this instance that should not be
305+
# allocated to the instance UUID in placement.
306+
# As noted above we don't want to take care in progress
307+
# migration during healing. So we simply ignore this instance.
308+
# If the instance needs healing then it will be healed when
309+
# after the migration is confirmed or reverted.
310+
continue
311+
296312
current_allocs = allocations[consumer]['allocations']
297313
current_rp_allocs = current_allocs.get(rp_uuid)
298314

@@ -326,9 +342,14 @@ def __str__(self) -> str:
326342
class PlacementView:
327343
"""The PCI Placement view"""
328344

329-
def __init__(self, hypervisor_hostname: str) -> None:
345+
def __init__(
346+
self,
347+
hypervisor_hostname: str,
348+
instances_under_same_host_resize: ty.List[str],
349+
) -> None:
330350
self.rps: ty.Dict[str, PciResourceProvider] = {}
331351
self.root_rp_name = hypervisor_hostname
352+
self.same_host_instances = instances_under_same_host_resize
332353

333354
def _get_rp_name_for_address(self, addr: str) -> str:
334355
return f"{self.root_rp_name}_{addr.upper()}"
@@ -459,7 +480,11 @@ def update_allocations(
459480
"""
460481
updated = False
461482
for rp in self.rps.values():
462-
updated |= rp.update_allocations(allocations, provider_tree)
483+
updated |= rp.update_allocations(
484+
allocations,
485+
provider_tree,
486+
self.same_host_instances,
487+
)
463488
return updated
464489

465490

@@ -500,6 +525,7 @@ def update_provider_tree_for_pci(
500525
nodename: str,
501526
pci_tracker: pci_manager.PciDevTracker,
502527
allocations: dict,
528+
instances_under_same_host_resize: ty.List[str],
503529
) -> bool:
504530
"""Based on the PciDevice objects in the pci_tracker it calculates what
505531
inventories and allocations needs to exist in placement and create the
@@ -529,6 +555,8 @@ def update_provider_tree_for_pci(
529555
},
530556
...
531557
}
558+
:param instances_under_same_host_resize: A list of instance UUIDs that
559+
are undergoing same host resize on this host.
532560
"""
533561
if not _is_placement_tracking_enabled():
534562
ensure_tracking_was_not_enabled_before(provider_tree)
@@ -541,7 +569,7 @@ def update_provider_tree_for_pci(
541569
'Collecting PCI inventories and allocations to track them in Placement'
542570
)
543571

544-
pv = PlacementView(nodename)
572+
pv = PlacementView(nodename, instances_under_same_host_resize)
545573
for dev in pci_tracker.pci_devs:
546574
# match the PCI device with the [pci]dev_spec config to access
547575
# the configuration metadata tags

nova/compute/resource_tracker.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,11 @@ def _update_to_placement(self, context, compute_node, startup):
12601260
context, nodename, provider_tree=prov_tree)
12611261
prov_tree.update_traits(nodename, traits)
12621262

1263+
instances_under_same_host_resize = [
1264+
migration.instance_uuid
1265+
for migration in self.tracked_migrations.values()
1266+
if migration.is_same_host_resize
1267+
]
12631268
# NOTE(gibi): Tracking PCI in placement is different from other
12641269
# resources.
12651270
#
@@ -1278,7 +1283,12 @@ def _update_to_placement(self, context, compute_node, startup):
12781283
# is enabled. So we need to be ready to heal PCI allocations at
12791284
# every call not just at startup.
12801285
pci_reshaped = pci_placement_translator.update_provider_tree_for_pci(
1281-
prov_tree, nodename, self.pci_tracker, allocs)
1286+
prov_tree,
1287+
nodename,
1288+
self.pci_tracker,
1289+
allocs,
1290+
instances_under_same_host_resize,
1291+
)
12821292

12831293
self.provider_tree = prov_tree
12841294

nova/objects/migration.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ def is_live_migration(self):
215215
def is_resize(self):
216216
return self.migration_type == fields.MigrationType.RESIZE
217217

218+
@property
219+
def is_same_host_resize(self):
220+
return self.is_resize and self.source_node == self.dest_node
221+
218222

219223
@base.NovaObjectRegistry.register
220224
class MigrationList(base.ObjectListBase, base.NovaObject):

nova/tests/functional/libvirt/test_pci_in_placement.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,3 +1346,113 @@ def test_heal_partial_allocations_during_resize_change_dev_type(self):
13461346
del compute1_expected_placement_view["allocations"][server["id"]]
13471347
self.assert_placement_pci_view(
13481348
"compute1", **compute1_expected_placement_view)
1349+
1350+
def test_heal_allocation_during_same_host_resize(self):
1351+
self.flags(allow_resize_to_same_host=True)
1352+
# The fake libvirt will emulate on the host:
1353+
# * one type-PFs (slot 0) with 3 type-VFs
1354+
compute1_pci_info = fakelibvirt.HostPCIDevicesInfo(
1355+
num_pci=0, num_pfs=1, num_vfs=3)
1356+
# the config matches just the VFs
1357+
compute1_device_spec = self._to_device_spec_conf(
1358+
[
1359+
{
1360+
"vendor_id": fakelibvirt.PCI_VEND_ID,
1361+
"product_id": fakelibvirt.VF_PROD_ID,
1362+
"address": "0000:81:00.*",
1363+
},
1364+
]
1365+
)
1366+
self.flags(group='pci', device_spec=compute1_device_spec)
1367+
# Start a compute with PCI tracking in placement
1368+
self.mock_pci_report_in_placement.return_value = True
1369+
self.start_compute(hostname="compute1", pci_info=compute1_pci_info)
1370+
self.assertPCIDeviceCounts("compute1", total=3, free=3)
1371+
compute1_expected_placement_view = {
1372+
"inventories": {
1373+
"0000:81:00.0": {self.VF_RC: 3},
1374+
},
1375+
"traits": {
1376+
"0000:81:00.0": [],
1377+
},
1378+
"usages": {
1379+
"0000:81:00.0": {self.VF_RC: 0},
1380+
},
1381+
"allocations": {},
1382+
}
1383+
self.assert_placement_pci_view(
1384+
"compute1", **compute1_expected_placement_view)
1385+
# Create an instance consuming one VFs
1386+
extra_spec = {"pci_passthrough:alias": "a-vf:1"}
1387+
flavor_id = self._create_flavor(extra_spec=extra_spec)
1388+
server = self._create_server(flavor_id=flavor_id, networks=[])
1389+
self.assertPCIDeviceCounts("compute1", total=3, free=2)
1390+
# As scheduling does not support PCI in placement yet no allocation
1391+
# is created for the PCI consumption by the scheduler. BUT the resource
1392+
# tracker in the compute will heal the missing PCI allocation
1393+
compute1_expected_placement_view[
1394+
"usages"]["0000:81:00.0"][self.VF_RC] = 1
1395+
compute1_expected_placement_view["allocations"][server["id"]] = {
1396+
"0000:81:00.0": {self.VF_RC: 1}
1397+
}
1398+
self.assert_placement_pci_view(
1399+
"compute1", **compute1_expected_placement_view)
1400+
self._run_periodics()
1401+
self.assert_placement_pci_view(
1402+
"compute1", **compute1_expected_placement_view)
1403+
1404+
# resize the server to consume 2 VFs on the same host
1405+
extra_spec = {"pci_passthrough:alias": "a-vf:2"}
1406+
flavor_id = self._create_flavor(extra_spec=extra_spec)
1407+
server = self._resize_server(server, flavor_id)
1408+
# during resize both the source and the dest allocation is kept
1409+
# and in same host resize that means both consumed from the same host
1410+
self.assertPCIDeviceCounts("compute1", total=3, free=0)
1411+
# the source side of the allocation held by the migration
1412+
self._move_server_allocation(
1413+
compute1_expected_placement_view["allocations"], server['id'])
1414+
# NOTE(gibi): we intentionally don't heal allocation for the instance
1415+
# while it is being resized. See the comment in the
1416+
# pci_placement_translator about the reasoning.
1417+
self.assert_placement_pci_view(
1418+
"compute1", **compute1_expected_placement_view)
1419+
self._run_periodics()
1420+
self.assert_placement_pci_view(
1421+
"compute1", **compute1_expected_placement_view)
1422+
1423+
# revert the resize
1424+
self._revert_resize(server)
1425+
self.assertPCIDeviceCounts("compute1", total=3, free=2)
1426+
# the original allocations are restored
1427+
self._move_server_allocation(
1428+
compute1_expected_placement_view["allocations"],
1429+
server["id"],
1430+
revert=True,
1431+
)
1432+
compute1_expected_placement_view[
1433+
"usages"]["0000:81:00.0"][self.VF_RC] = 1
1434+
compute1_expected_placement_view["allocations"][server["id"]] = {
1435+
"0000:81:00.0": {self.VF_RC: 1}
1436+
}
1437+
self.assert_placement_pci_view(
1438+
"compute1", **compute1_expected_placement_view)
1439+
self._run_periodics()
1440+
self.assert_placement_pci_view(
1441+
"compute1", **compute1_expected_placement_view)
1442+
1443+
# now resize and then confirm it
1444+
self._resize_server(server, flavor_id)
1445+
self._confirm_resize(server)
1446+
1447+
# we expect that the consumption is according to the new flavor
1448+
self.assertPCIDeviceCounts("compute1", total=3, free=1)
1449+
compute1_expected_placement_view[
1450+
"usages"]["0000:81:00.0"][self.VF_RC] = 2
1451+
compute1_expected_placement_view["allocations"][server["id"]] = {
1452+
"0000:81:00.0": {self.VF_RC: 2}
1453+
}
1454+
self.assert_placement_pci_view(
1455+
"compute1", **compute1_expected_placement_view)
1456+
self._run_periodics()
1457+
self.assert_placement_pci_view(
1458+
"compute1", **compute1_expected_placement_view)

nova/tests/unit/compute/test_pci_placement_translator.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_translator_skips_devices_without_matching_spec(self):
6161
provider_tree = mock.NonCallableMock()
6262

6363
ppt.update_provider_tree_for_pci(
64-
provider_tree, "fake-node", pci_tracker, {})
64+
provider_tree, "fake-node", pci_tracker, {}, [])
6565

6666
self.assertIn(
6767
"Device spec is not found for device 0000:81:00.0 in "
@@ -113,7 +113,8 @@ def test_resource_class_normalization(self, pci_dev, rc_name, expected_rc):
113113
)
114114

115115
def test_dependent_device_pf_then_vf(self):
116-
pv = ppt.PlacementView("fake-node")
116+
pv = ppt.PlacementView(
117+
"fake-node", instances_under_same_host_resize=[])
117118
pf = pci_device.PciDevice(
118119
address="0000:81:00.0",
119120
dev_type=fields.PciDeviceType.SRIOV_PF
@@ -140,7 +141,8 @@ def test_dependent_device_pf_then_vf(self):
140141
)
141142

142143
def test_dependent_device_vf_then_pf(self):
143-
pv = ppt.PlacementView("fake-node")
144+
pv = ppt.PlacementView(
145+
"fake-node", instances_under_same_host_resize=[])
144146
pf = pci_device.PciDevice(
145147
address="0000:81:00.0",
146148
dev_type=fields.PciDeviceType.SRIOV_PF
@@ -173,7 +175,8 @@ def test_dependent_device_vf_then_pf(self):
173175
)
174176

175177
def test_mixed_rc_for_sibling_vfs(self):
176-
pv = ppt.PlacementView("fake-node")
178+
pv = ppt.PlacementView(
179+
"fake-node", instances_under_same_host_resize=[])
177180
vf1, vf2, vf3, vf4 = [
178181
pci_device.PciDevice(
179182
address="0000:81:00.%d" % f,

nova/tests/unit/compute/test_resource_tracker.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1808,6 +1808,7 @@ def test_update_pci_reporting(self, mock_update_provider_tree_for_pci):
18081808
compute_obj.hypervisor_hostname,
18091809
self.rt.pci_tracker,
18101810
mock_get_allocs.return_value,
1811+
[],
18111812
)
18121813
upt = self.rt.reportclient.update_from_provider_tree
18131814
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
@@ -1847,6 +1848,7 @@ def test_update_pci_reporting_reshape(
18471848
compute_obj.hypervisor_hostname,
18481849
self.rt.pci_tracker,
18491850
mock_get_allocs.return_value,
1851+
[],
18501852
)
18511853
upt = self.rt.reportclient.update_from_provider_tree
18521854
upt.assert_called_once_with(
@@ -1892,11 +1894,72 @@ def test_update_pci_reporting_driver_reshape(
18921894
compute_obj.hypervisor_hostname,
18931895
self.rt.pci_tracker,
18941896
mock_get_allocs.return_value,
1897+
[],
18951898
)
18961899
upt = self.rt.reportclient.update_from_provider_tree
18971900
upt.assert_called_once_with(
18981901
mock.sentinel.ctx, ptree, allocations=mock_get_allocs.return_value)
18991902

1903+
@mock.patch(
1904+
'nova.compute.resource_tracker.ResourceTracker.'
1905+
'_sync_compute_service_disabled_trait',
1906+
new=mock.Mock()
1907+
)
1908+
@mock.patch(
1909+
'nova.compute.resource_tracker.ResourceTracker._resource_change',
1910+
new=mock.Mock(return_value=False)
1911+
)
1912+
@mock.patch(
1913+
'nova.compute.pci_placement_translator.update_provider_tree_for_pci')
1914+
def test_update_pci_reporting_same_host_resize(
1915+
self, mock_update_provider_tree_for_pci
1916+
):
1917+
"""Assert that resource tracker calls update_provider_tree_for_pci
1918+
and with the list of instances that are being resized to the same
1919+
host.
1920+
"""
1921+
compute_obj = _COMPUTE_NODE_FIXTURES[0].obj_clone()
1922+
self._setup_rt()
1923+
ptree = self._setup_ptree(compute_obj)
1924+
# simulate that pci reporting did not touch allocations
1925+
mock_update_provider_tree_for_pci.return_value = False
1926+
self.rt.tracked_migrations = {
1927+
uuids.inst1: objects.Migration(
1928+
migration_type="resize",
1929+
source_node="fake-node",
1930+
dest_node="fake-node",
1931+
instance_uuid=uuids.inst1,
1932+
),
1933+
uuids.inst2: objects.Migration(
1934+
migration_type="evacuation",
1935+
source_node="fake-node",
1936+
dest_node="fake-node",
1937+
instance_uuid=uuids.inst2,
1938+
),
1939+
uuids.inst3: objects.Migration(
1940+
migration_type="resize",
1941+
source_node="fake-node1",
1942+
dest_node="fake-node2",
1943+
instance_uuid=uuids.inst3,
1944+
),
1945+
}
1946+
1947+
self.rt._update(mock.sentinel.ctx, compute_obj)
1948+
1949+
mock_get_allocs = (
1950+
self.report_client_mock.get_allocations_for_provider_tree)
1951+
mock_get_allocs.assert_called_once_with(
1952+
mock.sentinel.ctx, compute_obj.hypervisor_hostname)
1953+
mock_update_provider_tree_for_pci.assert_called_once_with(
1954+
ptree,
1955+
compute_obj.hypervisor_hostname,
1956+
self.rt.pci_tracker,
1957+
mock_get_allocs.return_value,
1958+
[uuids.inst1],
1959+
)
1960+
upt = self.rt.reportclient.update_from_provider_tree
1961+
upt.assert_called_once_with(mock.sentinel.ctx, ptree, allocations=None)
1962+
19001963
@mock.patch('nova.objects.Service.get_by_compute_host',
19011964
return_value=objects.Service(disabled=True))
19021965
def test_sync_compute_service_disabled_trait_add(self, mock_get_by_host):

0 commit comments

Comments
 (0)