Skip to content

Commit dcf2e3c

Browse files
committed
Update InstanceNUMACell version in more cases
The data migration of InstanceNUMACell 1.4 to 1.5 only moved the data to the new pcpuset field but does not update the ovo version string of the object in the DB. The previous patch added the missing version update logic. However it only fixes the issue if the data is not already "half" migrated to the new structure. So this patch adds logic to also do the right thing if the wrong data migration already happened. At the end the solution needs to consider multiple scenarios: * data is never migrated to the new schema so the new code needs to migrate it and update the version string to match the new schema. (done by the previous patch) * data is half migrated by the buggy code and the new code need to finish the migration by stamping the version in the DB. * data is half migrated and then further modified to use the new 1.6 feature cpu_policy mixed. * data version is older in the DB than we can meaningfully upgrade Closes-Bug: #2097359 Change-Id: I10ecfa7841b15637dea3e4736e90faa5f33ddff3 (cherry picked from commit 9507b7b) (cherry picked from commit c008126) (cherry picked from commit 882c5eb) (cherry picked from commit e3db83c)
1 parent 7bb6b8d commit dcf2e3c

File tree

3 files changed

+186
-18
lines changed

3 files changed

+186
-18
lines changed

nova/objects/instance_numa.py

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -196,26 +196,78 @@ def _migrate_legacy_dedicated_instance_cpuset(cls, obj):
196196
# come from instance_extra or request_spec too.
197197
update_db = False
198198
for cell in obj.cells:
199-
if len(cell.cpuset) == 0:
199+
version = versionutils.convert_version_to_tuple(cell.VERSION)
200+
201+
if version < (1, 4):
202+
LOG.warning(
203+
"InstanceNUMACell %s with version %s for instance %s has "
204+
"too old version in the DB, don't know how to update, "
205+
"ignoring.", cell, cell.VERSION, obj.instance_uuid)
206+
continue
207+
208+
if (version >= (1, 5) and
209+
cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED and
210+
(cell.cpuset or not cell.pcpuset)
211+
):
212+
LOG.warning(
213+
"InstanceNUMACell %s with version %s is inconsistent as "
214+
"the version is 1.5 or greater, cpu_policy is dedicated, "
215+
"but cpuset is not empty or pcpuset is empty.",
216+
cell, cell.VERSION)
200217
continue
201-
# NOTE(gibi): This data migration populates the pcpuset field that
202-
# is new in version 1.5. However below we bump the object version
203-
# to 1.6 directly. This is intentional. The version 1.6 introduced
204-
# a new possible value 'mixed' for the cpu_policy field. As that
205-
# is a forward compatible change we don't have a specific data
206-
# migration for it. But we also don't have an automated way to bump
207-
# old object versions from 1.5 to 1.6. So we do it here just to
208-
# avoid inconsistency between data and version in the DB.
209-
if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
210-
cell.pcpuset = cell.cpuset
211-
cell.cpuset = set()
212-
cell.VERSION = '1.6'
213-
update_db = True
214-
else:
215-
if 'pcpuset' not in cell:
218+
219+
# NOTE(gibi): The data migration between 1.4. and 1.5 populates the
220+
# pcpuset field that is new in version 1.5. However below we update
221+
# the object version to 1.6 directly. This is intentional. The
222+
# version 1.6 introduced a new possible value 'mixed' for the
223+
# cpu_policy field. As that is a forward compatible change we don't
224+
# have a specific data migration for it. But we also don't have an
225+
# automated way to update old object versions from 1.5 to 1.6. So
226+
# we do it here just to avoid inconsistency between data and
227+
# version in the DB.
228+
if version < (1, 6):
229+
if cell.cpu_policy == obj_fields.CPUAllocationPolicy.DEDICATED:
230+
if "pcpuset" not in cell or not cell.pcpuset:
231+
# this cell was never migrated to 1.6, migrate it.
232+
cell.pcpuset = cell.cpuset
233+
cell.cpuset = set()
234+
cell.VERSION = '1.6'
235+
update_db = True
236+
else:
237+
# This data was already migrated to 1.6 format but the
238+
# version string wasn't updated to 1.6. This happened
239+
# before the fix
240+
# https://bugs.launchpad.net/nova/+bug/2097360
241+
# Only update the version string.
242+
cell.VERSION = '1.6'
243+
update_db = True
244+
elif cell.cpu_policy in (
245+
None, obj_fields.CPUAllocationPolicy.SHARED):
246+
# no data migration needed just add the new field and
247+
# stamp the new version in the DB
216248
cell.pcpuset = set()
217249
cell.VERSION = '1.6'
218250
update_db = True
251+
else: # obj_fields.CPUAllocationPolicy.MIXED
252+
# This means the cell data already got updated to the 1.6
253+
# content as MIXED only supported with 1.6 but the version
254+
# was not updated to 1.6.
255+
# We should not do the data migration as that would trample
256+
# the pcpuset field. Just stamp the 1.6 version in the DB
257+
# and hope for the best.
258+
LOG.warning(
259+
"InstanceNUMACell %s with version %s for instance %s "
260+
"has older than 1.6 version in the DB but using the "
261+
"1.6 feature CPUAllocationPolicy.MIXED. So nova "
262+
"assumes that the data is in 1.6 format and only the "
263+
"version string is old. Correcting the version string "
264+
"in the DB.", cell, cell.VERSION, obj.instance_uuid)
265+
cell.VERSION = '1.6'
266+
update_db = True
267+
268+
# When the next ovo version 1.7 is added it needs to be handed
269+
# here to do any migration if needed and to ensure the version in
270+
# the DB is stamped to 1.7
219271

220272
return update_db
221273

nova/tests/unit/objects/test_instance_numa.py

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ def test_obj_from_db_obj(self):
363363
fake_topo_obj = copy.deepcopy(fake_topo_obj_w_cell_v1_4)
364364
for cell in fake_topo_obj.cells:
365365
cell.cpu_policy = objects.fields.CPUAllocationPolicy.DEDICATED
366+
cell.VERSION = '1.4'
366367

367368
numa_topology = objects.InstanceNUMATopology.obj_from_db_obj(
368369
self.context, fake_instance_uuid, fake_topo_obj._to_json())
@@ -402,6 +403,7 @@ def test_obj_from_db_obj_no_pinning(self):
402403
fake_topo_obj = copy.deepcopy(fake_topo_obj_w_cell_v1_4)
403404
for cell in fake_topo_obj.cells:
404405
cell.cpu_policy = objects.fields.CPUAllocationPolicy.SHARED
406+
cell.VERSION = '1.4'
405407

406408
numa_topology = objects.InstanceNUMATopology.obj_from_db_obj(
407409
self.context, fake_instance_uuid, fake_topo_obj._to_json())
@@ -412,7 +414,7 @@ def test_obj_from_db_obj_no_pinning(self):
412414
self.assertEqual(topo_cell.cpuset, obj_cell.cpuset)
413415
self.assertEqual(set(), obj_cell.pcpuset)
414416

415-
def test__migrate_legacy_dedicated_instance_cpuset(self):
417+
def test__migrate_legacy_dedicated_instance_cpuset_dedicate_1_4(self):
416418
# Create a topology with a cell on latest version. Would be nice
417419
# to create one the old 1.4 cell version directly but that is only
418420
# possible indirectly as done below.
@@ -446,7 +448,118 @@ def test__migrate_legacy_dedicated_instance_cpuset(self):
446448
# pcpuset
447449
self.assertEqual(set(), topo_loaded.cells[0].cpuset)
448450
self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
449-
# and the version is bumped to 1.6
451+
# and the version is updated to 1.6
452+
self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
453+
454+
def test__migrate_legacy_dedicated_instance_cpuset_shared_1_4(self):
455+
# Create a topology with a cell on latest version. Would be nice
456+
# to create one the old 1.4 cell version directly but that is only
457+
# possible indirectly as done below.
458+
topo = objects.InstanceNUMATopology(
459+
instance_uuid=fake_instance_uuid,
460+
cells=[
461+
objects.InstanceNUMACell(id=0, cpuset={0, 1}, pcpuset=set()),
462+
])
463+
topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.SHARED
464+
465+
# Use the builtin backlevelling logic to pull it back to old cell
466+
# version
467+
topo_with_cell_1_4 = topo.obj_to_primitive(
468+
target_version='1.3', version_manifest={'InstanceNUMACell': '1.4'})
469+
470+
# Just check that the backlevelling works, and we have a cell with
471+
# version and data on 1.4 level
472+
cell_1_4_primitive = topo_with_cell_1_4['nova_object.data']['cells'][0]
473+
self.assertEqual('1.4', cell_1_4_primitive['nova_object.version'])
474+
self.assertEqual(
475+
(0, 1), cell_1_4_primitive['nova_object.data']['cpuset'])
476+
self.assertNotIn('pcpuset', cell_1_4_primitive['nova_object.data'])
477+
478+
# Now simulate that such old data is loaded from the DB and migrated
479+
# from 1.4 to 1.6 by the data migration
480+
topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
481+
self.context, fake_instance_uuid,
482+
jsonutils.dumps(topo_with_cell_1_4))
483+
484+
# In place data migration did not move the data as the cpu_policy is
485+
# shared
486+
self.assertEqual({0, 1}, topo_loaded.cells[0].cpuset)
487+
self.assertEqual(set(), topo_loaded.cells[0].pcpuset)
488+
# but the version is updated to 1.6
489+
self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
490+
491+
def test__migrate_legacy_dedicated_instance_cpuset_dedicated_half_migrated(
492+
self
493+
):
494+
# Before the fix for https://bugs.launchpad.net/nova/+bug/2097360
495+
# landed Nova only half migrated the InstanceNUMACell from 1.4 to 1.6
496+
# by doing the data move but not updating the version string in the DB.
497+
# This test case ensures that if such migration happened before the fix
498+
# was deployed then Nova fixed the DB content on the next load as well.
499+
500+
# Create a topology with a cell on latest version. Would be nice
501+
# to create one the old 1.4 cell version directly but that is only
502+
# possible indirectly as done below.
503+
topo = objects.InstanceNUMATopology(
504+
instance_uuid=fake_instance_uuid,
505+
cells=[
506+
objects.InstanceNUMACell(id=0, cpuset=set(), pcpuset={0, 1}),
507+
])
508+
topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.DEDICATED
509+
510+
# simulate the half done migration by pulling back the version string
511+
# in the primitive form to 1.4 while keeping the data on 1.6 format.
512+
topo_primitive = topo.obj_to_primitive()
513+
cell_primitive = topo_primitive['nova_object.data']['cells'][0]
514+
self.assertEqual('1.6', cell_primitive['nova_object.version'])
515+
cell_primitive['nova_object.version'] = '1.4'
516+
517+
topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
518+
self.context, fake_instance_uuid, jsonutils.dumps(topo_primitive))
519+
520+
# the data did not change
521+
self.assertEqual(set(), topo_loaded.cells[0].cpuset)
522+
self.assertEqual({0, 1}, topo_loaded.cells[0].pcpuset)
523+
# but the version is updated to 1.6
524+
self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
525+
526+
def test__migrate_legacy_dedicated_instance_cpuset_mixed_1_4(self):
527+
# Before the fix for https://bugs.launchpad.net/nova/+bug/2097360
528+
# landed Nova only half migrated the InstanceNUMACell from 1.4 to 1.6
529+
# by doing the data move but not updating the version string in the DB.
530+
# After this the instance is resized to flavor with mixed cpu_policy
531+
# then there is a good chance that the DB has version string 1.4 but
532+
# the data is on 1.6 format with the cpu_policy set to mixed.
533+
# This test case ensures that if such situation happened before the fix
534+
# was deployed then Nova fixed the DB content on the next load as well.
535+
536+
# Create a topology with a cell on latest version. Would be nice
537+
# to create one the old 1.4 cell version directly but that is only
538+
# possible indirectly as done below.
539+
topo = objects.InstanceNUMATopology(
540+
instance_uuid=fake_instance_uuid,
541+
cells=[
542+
objects.InstanceNUMACell(id=0, cpuset={0, 1}, pcpuset={2, 3}),
543+
])
544+
topo.cells[0].cpu_policy = objects.fields.CPUAllocationPolicy.MIXED
545+
546+
# simulate the half done migration by pulling back the version string
547+
# in the primitive form to 1.4 while keeping the data on 1.6 format.
548+
topo_primitive = topo.obj_to_primitive()
549+
cell_primitive = topo_primitive['nova_object.data']['cells'][0]
550+
self.assertEqual('1.6', cell_primitive['nova_object.version'])
551+
cell_primitive['nova_object.version'] = '1.4'
552+
553+
topo_loaded = objects.InstanceNUMATopology.obj_from_db_obj(
554+
self.context, fake_instance_uuid, jsonutils.dumps(topo_primitive))
555+
556+
# the data did not change
557+
self.assertEqual({0, 1}, topo_loaded.cells[0].cpuset)
558+
self.assertEqual({2, 3}, topo_loaded.cells[0].pcpuset)
559+
self.assertEqual(
560+
objects.fields.CPUAllocationPolicy.MIXED,
561+
topo_loaded.cells[0].cpu_policy)
562+
# but the version is updated to 1.6
450563
self.assertEqual('1.6', topo_loaded.cells[0].VERSION)
451564

452565

nova/tests/unit/objects/test_request_spec.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,9 @@ def test_get_by_instance_uuid_numa_topology_migration(
722722
id=1, cpuset={3, 4}, memory=512, cpu_policy="dedicated"),
723723
]
724724
)
725+
for cell in numa_topology.cells:
726+
cell.VERSION = '1.4'
727+
725728
spec_obj = fake_request_spec.fake_spec_obj()
726729
spec_obj.numa_topology = numa_topology
727730
fake_spec = fake_request_spec.fake_db_spec(spec_obj)

0 commit comments

Comments
 (0)