Skip to content

Commit 4b7c31d

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Heal missing simple PCI allocation in the resource tracker"
2 parents bcdf598 + d483a69 commit 4b7c31d

File tree

3 files changed

+341
-6
lines changed

3 files changed

+341
-6
lines changed

nova/compute/pci_placement_translator.py

Lines changed: 103 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1212
# License for the specific language governing permissions and limitations
1313
# under the License.
14+
import collections
15+
import copy
1416
import typing as ty
1517

1618
import os_resource_classes
1719
import os_traits
1820
from oslo_log import log as logging
21+
from oslo_utils import uuidutils
1922

2023
from nova.compute import provider_tree
2124
from nova import exception
@@ -126,6 +129,10 @@ def __init__(self, name: str) -> None:
126129
def devs(self) -> ty.List[pci_device.PciDevice]:
127130
return [self.parent_dev] if self.parent_dev else self.children_devs
128131

132+
@property
133+
def to_be_deleted(self):
134+
return not bool(self.devs)
135+
129136
def add_child(self, dev, dev_spec_tags: ty.Dict[str, str]) -> None:
130137
if self.parent_dev:
131138
raise exception.PlacementPciDependentDeviceException(
@@ -177,13 +184,34 @@ def remove_parent(self, dev: pci_device.PciDevice) -> None:
177184
# Nothing to do here. The update_provider_tree we handle full RP
178185
pass
179186

187+
def _get_allocations(self) -> ty.Mapping[str, int]:
188+
"""Return a dict of used resources keyed by consumer UUID.
189+
190+
Note that:
191+
1) a single consumer can consume more than one resource from a single
192+
RP. I.e. A VM with two VFs from the same parent PF
193+
2) multiple consumers can consume resources from a single RP. I.e. two
194+
VMs consuming one VF from the same PF each
195+
3) regardless of how many consumers we have on a single PCI RP, they
196+
are always consuming resources from the same resource class as
197+
we are not supporting dependent devices modelled by the same RP but
198+
different resource classes.
199+
"""
200+
return collections.Counter(
201+
[
202+
dev.instance_uuid
203+
for dev in self.devs
204+
if "instance_uuid" in dev and dev.instance_uuid
205+
]
206+
)
207+
180208
def update_provider_tree(
181209
self,
182210
provider_tree: provider_tree.ProviderTree,
183211
parent_rp_name: str,
184212
) -> None:
185213

186-
if not self.parent_dev and not self.children_devs:
214+
if self.to_be_deleted:
187215
# This means we need to delete the RP from placement if exists
188216
if provider_tree.exists(self.name):
189217
# NOTE(gibi): If there are allocations on this RP then
@@ -194,7 +222,16 @@ def update_provider_tree(
194222
return
195223

196224
if not provider_tree.exists(self.name):
197-
provider_tree.new_child(self.name, parent_rp_name)
225+
# NOTE(gibi): We need to generate UUID for the new provider in Nova
226+
# instead of letting Placement assign one. We are potentially
227+
# healing a missing RP along with missing allocations on that RP.
228+
# The allocation healing happens with POST /reshape, and that API
229+
# only takes RP UUIDs.
230+
provider_tree.new_child(
231+
self.name,
232+
parent_rp_name,
233+
uuid=uuidutils.generate_uuid(dashed=True)
234+
)
198235

199236
provider_tree.update_inventory(
200237
self.name,
@@ -214,6 +251,43 @@ def update_provider_tree(
214251
)
215252
provider_tree.update_traits(self.name, self.traits)
216253

254+
def update_allocations(
255+
self,
256+
allocations: dict,
257+
provider_tree: provider_tree.ProviderTree
258+
) -> bool:
259+
updated = False
260+
261+
if self.to_be_deleted:
262+
# the RP is going away because either removed from the hypervisor
263+
# or the compute's config is changed to ignore the device.
264+
return updated
265+
266+
# we assume here that if this RP has been created in the current round
267+
# of healing then it already has a UUID assigned.
268+
rp_uuid = provider_tree.data(self.name).uuid
269+
270+
for consumer, amount in self._get_allocations().items():
271+
current_allocs = allocations[consumer]['allocations']
272+
current_rp_allocs = current_allocs.get(rp_uuid)
273+
274+
if current_rp_allocs:
275+
# update an existing allocation if the current one differs
276+
current_rc_allocs = current_rp_allocs["resources"].get(
277+
self.resource_class, 0)
278+
if current_rc_allocs != amount:
279+
current_rp_allocs[
280+
"resources"][self.resource_class] = amount
281+
updated = True
282+
else:
283+
# insert a new allocation as it is missing
284+
current_allocs[rp_uuid] = {
285+
"resources": {self.resource_class: amount}
286+
}
287+
updated = True
288+
289+
return updated
290+
217291
def __str__(self) -> str:
218292
if self.devs:
219293
return (
@@ -348,6 +422,21 @@ def update_provider_tree(
348422
for rp_name, rp in self.rps.items():
349423
rp.update_provider_tree(provider_tree, self.root_rp_name)
350424

425+
def update_allocations(
426+
self,
427+
allocations: dict,
428+
provider_tree: provider_tree.ProviderTree
429+
) -> bool:
430+
"""Updates the passed in allocations dict inplace with any PCI
431+
allocations that is inferred from the PciDevice objects already added
432+
to the view. It returns True if the allocations dict has been changed,
433+
False otherwise.
434+
"""
435+
updated = False
436+
for rp in self.rps.values():
437+
updated |= rp.update_allocations(allocations, provider_tree)
438+
return updated
439+
351440

352441
def ensure_no_dev_spec_with_devname(dev_specs: ty.List[devspec.PciDeviceSpec]):
353442
for dev_spec in dev_specs:
@@ -437,7 +526,16 @@ def update_provider_tree_for_pci(
437526
LOG.info("Placement PCI resource view: %s", pv)
438527

439528
pv.update_provider_tree(provider_tree)
440-
# FIXME(gibi): Check allocations too based on pci_dev.instance_uuid and
441-
# if here was any update then we have to return True to trigger a reshape.
529+
old_alloc = copy.deepcopy(allocations)
530+
updated = pv.update_allocations(allocations, provider_tree)
531+
532+
if updated:
533+
LOG.debug(
534+
"Placement PCI view needs allocation healing. This should only "
535+
"happen if [scheduler]pci_in_placement is still disabled. "
536+
"Original allocations: %s New allocations: %s",
537+
old_alloc,
538+
allocations,
539+
)
442540

443-
return False
541+
return updated

nova/tests/functional/libvirt/test_pci_in_placement.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ def setUp(self):
6262
def _to_device_spec_conf(spec_list):
6363
return [jsonutils.dumps(x) for x in spec_list]
6464

65+
66+
class PlacementPCIInventoryReportingTests(PlacementPCIReportingTests):
67+
6568
def test_new_compute_init_with_pci_devs(self):
6669
"""A brand new compute is started with multiple pci devices configured
6770
for nova.
@@ -748,3 +751,188 @@ def test_reporting_cannot_be_disable_once_it_is_enabled(self):
748751
"enabled.",
749752
str(ex)
750753
)
754+
755+
756+
class PlacementPCIAllocationHealingTests(PlacementPCIReportingTests):
757+
def setUp(self):
758+
super().setUp()
759+
# Pre-configure a PCI alias to consume our devs
760+
alias_pci = {
761+
"vendor_id": fakelibvirt.PCI_VEND_ID,
762+
"product_id": fakelibvirt.PCI_PROD_ID,
763+
"name": "a-pci-dev",
764+
}
765+
alias_pf = {
766+
"vendor_id": fakelibvirt.PCI_VEND_ID,
767+
"product_id": fakelibvirt.PF_PROD_ID,
768+
"device_type": "type-PF",
769+
"name": "a-pf",
770+
}
771+
alias_vf = {
772+
"vendor_id": fakelibvirt.PCI_VEND_ID,
773+
"product_id": fakelibvirt.VF_PROD_ID,
774+
"device_type": "type-VF",
775+
"name": "a-vf",
776+
}
777+
self.flags(
778+
group='pci',
779+
alias=self._to_pci_alias_conf([alias_pci, alias_pf, alias_vf]))
780+
781+
@staticmethod
782+
def _to_pci_alias_conf(alias_list):
783+
return [jsonutils.dumps(x) for x in alias_list]
784+
785+
def test_heal_single_pci_allocation(self):
786+
# The fake libvirt will emulate on the host:
787+
# * one type-PCI in slot 0
788+
pci_info = fakelibvirt.HostPCIDevicesInfo(
789+
num_pci=1, num_pfs=0, num_vfs=0)
790+
# the config matches the PCI dev
791+
device_spec = self._to_device_spec_conf(
792+
[
793+
{
794+
"vendor_id": fakelibvirt.PCI_VEND_ID,
795+
"product_id": fakelibvirt.PCI_PROD_ID,
796+
},
797+
]
798+
)
799+
self.flags(group='pci', device_spec=device_spec)
800+
801+
# Start a compute *without* PCI tracking in placement
802+
self.mock_pci_report_in_placement.return_value = False
803+
self.start_compute(hostname="compute1", pci_info=pci_info)
804+
self.assertPCIDeviceCounts("compute1", total=1, free=1)
805+
806+
# Create an instance that consume our PCI dev
807+
extra_spec = {"pci_passthrough:alias": "a-pci-dev:1"}
808+
flavor_id = self._create_flavor(extra_spec=extra_spec)
809+
server = self._create_server(flavor_id=flavor_id, networks=[])
810+
self.assertPCIDeviceCounts("compute1", total=1, free=0)
811+
812+
# Restart the compute but now with PCI tracking enabled
813+
self.mock_pci_report_in_placement.return_value = True
814+
self.restart_compute_service("compute1")
815+
# Assert that the PCI allocation is healed in placement
816+
self.assertPCIDeviceCounts("compute1", total=1, free=0)
817+
expected_placement_view = {
818+
"inventories": {
819+
"0000:81:00.0": {self.PCI_RC: 1},
820+
},
821+
"traits": {
822+
"0000:81:00.0": [],
823+
},
824+
"usages": {
825+
"0000:81:00.0": {self.PCI_RC: 1}
826+
},
827+
"allocations": {
828+
server['id']: {
829+
"0000:81:00.0": {self.PCI_RC: 1}
830+
}
831+
}
832+
}
833+
self.assert_placement_pci_view("compute1", **expected_placement_view)
834+
835+
# run an update_available_resources periodic and assert that the usage
836+
# and allocation stays
837+
self._run_periodics()
838+
self.assert_placement_pci_view("compute1", **expected_placement_view)
839+
840+
def test_heal_multiple_allocations(self):
841+
# The fake libvirt will emulate on the host:
842+
# * two type-PCI devs (slot 0 and 1)
843+
# * two type-PFs (slot 2 and 3) with 4 type-VFs each
844+
pci_info = fakelibvirt.HostPCIDevicesInfo(
845+
num_pci=2, num_pfs=2, num_vfs=8)
846+
# the config matches:
847+
device_spec = self._to_device_spec_conf(
848+
[
849+
# both type-PCI
850+
{
851+
"vendor_id": fakelibvirt.PCI_VEND_ID,
852+
"product_id": fakelibvirt.PCI_PROD_ID,
853+
},
854+
# the PF in slot 2
855+
{
856+
"vendor_id": fakelibvirt.PCI_VEND_ID,
857+
"product_id": fakelibvirt.PF_PROD_ID,
858+
"address": "0000:81:02.0",
859+
},
860+
# the VFs in slot 3
861+
{
862+
"vendor_id": fakelibvirt.PCI_VEND_ID,
863+
"product_id": fakelibvirt.VF_PROD_ID,
864+
"address": "0000:81:03.*",
865+
},
866+
]
867+
)
868+
self.flags(group='pci', device_spec=device_spec)
869+
870+
# Start a compute *without* PCI tracking in placement
871+
self.mock_pci_report_in_placement.return_value = False
872+
self.start_compute(hostname="compute1", pci_info=pci_info)
873+
# 2 PCI + 1 PF + 4 VFs
874+
self.assertPCIDeviceCounts("compute1", total=7, free=7)
875+
876+
# Create three instances consuming devices:
877+
# * server_2pci: two type-PCI
878+
# * server_pf_vf: one PF and one VF
879+
# * server_2vf: two VFs
880+
extra_spec = {"pci_passthrough:alias": "a-pci-dev:2"}
881+
flavor_id = self._create_flavor(extra_spec=extra_spec)
882+
server_2pci = self._create_server(flavor_id=flavor_id, networks=[])
883+
self.assertPCIDeviceCounts("compute1", total=7, free=5)
884+
885+
extra_spec = {"pci_passthrough:alias": "a-pf:1,a-vf:1"}
886+
flavor_id = self._create_flavor(extra_spec=extra_spec)
887+
server_pf_vf = self._create_server(flavor_id=flavor_id, networks=[])
888+
self.assertPCIDeviceCounts("compute1", total=7, free=3)
889+
890+
extra_spec = {"pci_passthrough:alias": "a-vf:2"}
891+
flavor_id = self._create_flavor(extra_spec=extra_spec)
892+
server_2vf = self._create_server(flavor_id=flavor_id, networks=[])
893+
self.assertPCIDeviceCounts("compute1", total=7, free=1)
894+
895+
# Restart the compute but now with PCI tracking enabled
896+
self.mock_pci_report_in_placement.return_value = True
897+
self.restart_compute_service("compute1")
898+
# Assert that the PCI allocation is healed in placement
899+
self.assertPCIDeviceCounts("compute1", total=7, free=1)
900+
expected_placement_view = {
901+
"inventories": {
902+
"0000:81:00.0": {self.PCI_RC: 1},
903+
"0000:81:01.0": {self.PCI_RC: 1},
904+
"0000:81:02.0": {self.PF_RC: 1},
905+
"0000:81:03.0": {self.VF_RC: 4},
906+
},
907+
"traits": {
908+
"0000:81:00.0": [],
909+
"0000:81:01.0": [],
910+
"0000:81:02.0": [],
911+
"0000:81:03.0": [],
912+
},
913+
"usages": {
914+
"0000:81:00.0": {self.PCI_RC: 1},
915+
"0000:81:01.0": {self.PCI_RC: 1},
916+
"0000:81:02.0": {self.PF_RC: 1},
917+
"0000:81:03.0": {self.VF_RC: 3},
918+
},
919+
"allocations": {
920+
server_2pci['id']: {
921+
"0000:81:00.0": {self.PCI_RC: 1},
922+
"0000:81:01.0": {self.PCI_RC: 1},
923+
},
924+
server_pf_vf['id']: {
925+
"0000:81:02.0": {self.PF_RC: 1},
926+
"0000:81:03.0": {self.VF_RC: 1},
927+
},
928+
server_2vf['id']: {
929+
"0000:81:03.0": {self.VF_RC: 2}
930+
},
931+
},
932+
}
933+
self.assert_placement_pci_view("compute1", **expected_placement_view)
934+
935+
# run an update_available_resources periodic and assert that the usage
936+
# and allocation stays
937+
self._run_periodics()
938+
self.assert_placement_pci_view("compute1", **expected_placement_view)

0 commit comments

Comments
 (0)