Skip to content

Commit 56f29b3

Browse files
author
Balazs Gibizer
committed
Remove extra instance.save() calls related to qos SRIOV ports
During creating or moving of an instance with qos SRIOV port the PCI device claim on the destination compute needs to be restricted to select PCI VFs from the same PF where the bandwidth for the qos port is allocated from. This is achieved by updating the spec part of the InstancePCIRequest with the device name of the PF by calling update_pci_request_spec_with_allocated_interface_name(). Until now such update of the instance object was directly persisted by the call. During code review it was came up that the instance.save() in the util is not appropriate as the caller has a lot more context to decide when to persist the changes. The original eager instance.save was introduced when support added to the server create flow. Now I realized that the need for such save was due to a mistake in the original ResourceTracker.instance_claim() call that loads the InstancePCIRequest from the DB instead of using the requests through the passed in instance object. By removing the extra DB call the need for eagerly persisting the PCI spec update is also removed. It turned out that both the server create code path and every server move code paths eventually persist the instance object either during at the end of the claim process or in case of live migration in the post_live_migration_at_destination compute manager call. This means that the code now can be simplified. Especially the live migration cases. In the live migrate abort case we don't need to roll back the eagerly persisted PCI change as now such change is only persisted at the end of the migration but still we need to refresh pci_requests field of the instance object during the rollback as that field might be stale, containing dest host related PCI information. Also in case of rescheduling during live migrate if the rescheduling failed the PCI change needed to be rolled back to the source host by a specific code. But now those change are never persisted until the migration finishes so this rollback code can be removed too. Change-Id: Ied8f96b4e67f79498519931cb6b35dad5288bbb8 blueprint: support-move-ops-with-qos-ports-ussuri
1 parent 4eafc9d commit 56f29b3

File tree

8 files changed

+70
-58
lines changed

8 files changed

+70
-58
lines changed

nova/compute/manager.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8552,6 +8552,18 @@ def _rollback_live_migration(self, context, instance,
85528552
None
85538553
85548554
"""
8555+
# NOTE(gibi): We need to refresh pci_requests of the instance as it
8556+
# might be changed by the conductor during scheduling based on the
8557+
# selected destination host. If the instance has SRIOV ports with
8558+
# resource request then the LiveMigrationTask._find_destination call
8559+
# updated the instance.pci_requests.requests[].spec with the SRIOV PF
8560+
# device name to be used on the destination host. As the migration is
8561+
# rolling back to the source host now we don't want to persist the
8562+
# destination host related changes in the DB.
8563+
instance.pci_requests = \
8564+
objects.InstancePCIRequests.get_by_instance_uuid(
8565+
context, instance.uuid)
8566+
85558567
if (isinstance(migrate_data, migrate_data_obj.LiveMigrateData) and
85568568
migrate_data.obj_attr_is_set('migration')):
85578569
migration = migrate_data.migration

nova/compute/resource_tracker.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ def instance_claim(self, context, instance, nodename, allocations,
156156
instance=instance)
157157

158158
cn = self.compute_nodes[nodename]
159-
pci_requests = objects.InstancePCIRequests.get_by_instance_uuid(
160-
context, instance.uuid)
159+
pci_requests = instance.pci_requests
161160
claim = claims.Claim(context, instance, nodename, self, cn,
162161
pci_requests, limits=limits)
163162

nova/compute/utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,7 +1509,6 @@ def needs_update(pci_request, mapping):
15091509
return (pci_request.requester_id and
15101510
pci_request.requester_id in mapping)
15111511

1512-
modified = False
15131512
for pci_request in instance.pci_requests.requests:
15141513
if needs_update(pci_request, provider_mapping):
15151514

@@ -1535,6 +1534,3 @@ def needs_update(pci_request, mapping):
15351534

15361535
for spec in pci_request.spec:
15371536
spec['parent_ifname'] = rp_name_pieces[2]
1538-
modified = True
1539-
if modified:
1540-
instance.save()

nova/conductor/tasks/live_migrate.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1010
# License for the specific language governing permissions and limitations
1111
# under the License.
12-
import copy
1312

1413
from oslo_log import log as logging
1514
import oslo_messaging as messaging
@@ -68,7 +67,6 @@ def __init__(self, context, instance, destination,
6867
self._source_cn = None
6968
self._held_allocations = None
7069
self.network_api = neutron.API()
71-
self.instance_pci_request = None
7270

7371
def _execute(self):
7472
self._check_instance_is_active()
@@ -84,11 +82,6 @@ def _execute(self):
8482
self.instance,
8583
self.migration))
8684

87-
# NOTE(gibi): migrating with qos SRIOV ports will update the PCI
88-
# request so we saving the request here so that we can roll that change
89-
# back if the migration fails.
90-
self.instance_pci_request = copy.deepcopy(self.instance.pci_requests)
91-
9285
if not self.destination:
9386
# Either no host was specified in the API request and the user
9487
# wants the scheduler to pick a destination host, or a host was
@@ -158,9 +151,6 @@ def rollback(self, ex):
158151
self._source_cn,
159152
self.instance,
160153
self.migration)
161-
if self.instance_pci_request:
162-
self.instance.pci_requests = self.instance_pci_request
163-
self.instance.save()
164154

165155
def _check_instance_is_active(self):
166156
if self.instance.power_state not in (power_state.RUNNING,
@@ -521,6 +511,10 @@ def _find_destination(self):
521511
provider_mapping = request_spec.get_request_group_mapping()
522512

523513
if provider_mapping:
514+
# NOTE(gibi): this call might update the pci_requests of the
515+
# instance based on the destination host if so then such change
516+
# will be persisted when post_live_migration_at_destination
517+
# runs.
524518
compute_utils.\
525519
update_pci_request_spec_with_allocated_interface_name(
526520
self.context, self.report_client, self.instance,

nova/tests/functional/test_servers.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6653,6 +6653,8 @@ def _test_resize_or_migrate_server_with_qos_ports(self, new_flavor=None):
66536653
migration_uuid, source_compute_rp_uuid=self.compute1_rp_uuid,
66546654
new_flavor=new_flavor)
66556655

6656+
self._assert_pci_request_pf_device_name(server, 'host2-ens2')
6657+
66566658
self._confirm_resize(server)
66576659

66586660
# check that allocation is still OK
@@ -6789,6 +6791,8 @@ def fake_prep_resize(_self, *args, **kwargs):
67896791
source_compute_rp_uuid=self.compute1_rp_uuid,
67906792
new_flavor=new_flavor)
67916793

6794+
self._assert_pci_request_pf_device_name(server, 'host3-ens2')
6795+
67926796
self._confirm_resize(server)
67936797

67946798
# check that allocation is still OK
@@ -7141,6 +7145,8 @@ def test_evacuate_with_qos_port(self, host=None):
71417145
self.compute2_rp_uuid, non_qos_normal_port, qos_normal_port,
71427146
qos_sriov_port)
71437147

7148+
self._assert_pci_request_pf_device_name(server, 'host2-ens2')
7149+
71447150
# recover source compute
71457151
self.admin_api.put_service(
71467152
self.compute1_service_id, {'forced_down': 'false'})
@@ -7300,6 +7306,8 @@ def test_live_migrate_with_qos_port(self, host=None):
73007306
server, self.compute2_rp_uuid, non_qos_normal_port,
73017307
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
73027308

7309+
self._assert_pci_request_pf_device_name(server, 'host2-ens2')
7310+
73037311
self._delete_server_and_check_allocations(
73047312
server, qos_normal_port, qos_sriov_port)
73057313

@@ -7364,6 +7372,8 @@ def fake_check_can_live_migrate_destination(
73647372
server, compute3_rp_uuid, non_qos_normal_port,
73657373
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
73667374

7375+
self._assert_pci_request_pf_device_name(server, 'host3-ens2')
7376+
73677377
self._delete_server_and_check_allocations(
73687378
server, qos_normal_port, qos_sriov_port)
73697379

@@ -7529,10 +7539,7 @@ def test_live_migrate_with_qos_port_abort_migration(self):
75297539
qos_normal_port, qos_sriov_port, self.flavor_with_group_policy)
75307540

75317541
# Assert that the InstancePCIRequests rolled back to point to host1
7532-
# This assert is fails now as the abort does not change the PCI device
7533-
# back
7534-
# TODO(gibi): come up with an idea to fix this
7535-
# self._assert_pci_request_pf_device_name(server, 'host1-ens2')
7542+
self._assert_pci_request_pf_device_name(server, 'host1-ens2')
75367543

75377544
self._delete_server_and_check_allocations(
75387545
server, qos_normal_port, qos_sriov_port)

nova/tests/unit/compute/test_compute.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6578,6 +6578,7 @@ def test_rollback_live_migration(self, mock_bdms, mock_get_node,
65786578
bdms = objects.BlockDeviceMappingList()
65796579
mock_bdms.return_value = bdms
65806580

6581+
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid')
65816582
@mock.patch('nova.compute.utils.notify_about_instance_action')
65826583
@mock.patch.object(migration, 'save')
65836584
@mock.patch.object(self.compute, '_revert_allocation')
@@ -6586,7 +6587,8 @@ def test_rollback_live_migration(self, mock_bdms, mock_get_node,
65866587
@mock.patch.object(compute_rpcapi.ComputeAPI,
65876588
'drop_move_claim_at_destination')
65886589
def _test(mock_drop_claim, mock_nw_api, mock_lmcf, mock_ra,
6589-
mock_mig_save, mock_notify):
6590+
mock_mig_save, mock_notify, mock_get_pci):
6591+
mock_get_pci.return_value = objects.InstancePCIRequests()
65906592
mock_lmcf.return_value = False, False
65916593
if migration_status:
65926594
self.compute._rollback_live_migration(
@@ -6611,6 +6613,8 @@ def _test(mock_drop_claim, mock_nw_api, mock_lmcf, mock_ra,
66116613
])
66126614
mock_ra.assert_called_once_with(mock.ANY, instance, migration)
66136615
mock_mig_save.assert_called_once_with()
6616+
mock_get_pci.assert_called_once_with(c, instance.uuid)
6617+
self.assertEqual(mock_get_pci.return_value, instance.pci_requests)
66146618

66156619
_test()
66166620

@@ -6637,6 +6641,7 @@ def test_rollback_live_migration_network_teardown_fails(
66376641
migrate_data = objects.LibvirtLiveMigrateData(migration=migration)
66386642
source_bdms = objects.BlockDeviceMappingList()
66396643

6644+
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid')
66406645
@mock.patch.object(self.compute, '_notify_about_instance_usage')
66416646
@mock.patch('nova.compute.utils.notify_about_instance_action')
66426647
@mock.patch.object(instance, 'save')
@@ -6647,7 +6652,10 @@ def test_rollback_live_migration_network_teardown_fails(
66476652
@mock.patch.object(self.compute.network_api, 'setup_networks_on_host',
66486653
side_effect=(None, test.TestingException))
66496654
def _test(mock_nw_setup, _mock_lmcf, mock_ra, mock_mig_save,
6650-
mock_inst_save, _mock_notify_action, mock_notify_usage):
6655+
mock_inst_save, _mock_notify_action, mock_notify_usage,
6656+
mock_get_pci):
6657+
mock_get_pci.return_value = objects.InstancePCIRequests()
6658+
66516659
self.assertRaises(test.TestingException,
66526660
self.compute._rollback_live_migration,
66536661
ctxt, instance, 'dest-host', migrate_data,
@@ -6672,6 +6680,8 @@ def _test(mock_nw_setup, _mock_lmcf, mock_ra, mock_mig_save,
66726680
# Since we failed during rollback, the migration status gets set
66736681
# to 'error' instead of 'goofballs'.
66746682
self.assertEqual('error', migration.status)
6683+
mock_get_pci.assert_called_once_with(ctxt, instance.uuid)
6684+
self.assertEqual(mock_get_pci.return_value, instance.pci_requests)
66756685

66766686
_test()
66776687

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9545,6 +9545,7 @@ def fake_bdm():
95459545
bdm.connection_info = new_attachment_id
95469546
bdms = objects.BlockDeviceMappingList(objects=[bdm])
95479547

9548+
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid')
95489549
@mock.patch.object(compute.volume_api, 'attachment_delete')
95499550
@mock.patch.object(compute_utils, 'notify_about_instance_action')
95509551
@mock.patch.object(instance, 'save')
@@ -9556,11 +9557,12 @@ def fake_bdm():
95569557
@mock.patch.object(objects.Instance, 'drop_migration_context')
95579558
def _test(mock_drop_mig_ctxt, mock_get_bdms, mock_net_api,
95589559
mock_remove_conn, mock_usage, mock_instance_save,
9559-
mock_action, mock_attach_delete):
9560+
mock_action, mock_attach_delete, mock_get_pci):
95609561
# this tests that _rollback_live_migration replaces the bdm's
95619562
# attachment_id with the original attachment id that is in
95629563
# migrate_data.
95639564
mock_get_bdms.return_value = bdms
9565+
mock_get_pci.return_value = objects.InstancePCIRequests()
95649566

95659567
compute._rollback_live_migration(self.context, instance, None,
95669568
migrate_data=migrate_data,
@@ -9574,6 +9576,8 @@ def _test(mock_drop_mig_ctxt, mock_get_bdms, mock_net_api,
95749576
self.assertEqual(orig_attachment_id, bdm.connection_info)
95759577
bdm.save.assert_called_once_with()
95769578
mock_drop_mig_ctxt.assert_called_once_with()
9579+
mock_get_pci.assert_called_once_with(self.context, instance.uuid)
9580+
self.assertEqual(mock_get_pci.return_value, instance.pci_requests)
95779581

95789582
_test()
95799583

@@ -9590,13 +9594,14 @@ def test_rollback_live_migration_port_binding_delete_fails(
95909594
migration=self.migration, is_shared_instance_path=True,
95919595
is_shared_block_storage=True)
95929596

9597+
@mock.patch('nova.objects.InstancePCIRequests.get_by_instance_uuid')
95939598
@mock.patch.object(self.compute, '_revert_allocation')
95949599
@mock.patch.object(self.instance, 'save')
95959600
@mock.patch.object(self.compute, 'network_api')
95969601
@mock.patch.object(self.compute, '_notify_about_instance_usage')
95979602
@mock.patch.object(objects.Instance, 'drop_migration_context')
95989603
def _do_test(drop_mig_ctxt, legacy_notify, network_api, instance_save,
9599-
_revert_allocation):
9604+
_revert_allocation, mock_get_pci):
96009605
# setup_networks_on_host is called two times:
96019606
# 1. set the migrating_to attribute in the port binding profile,
96029607
# which is a no-op in this case for neutron
@@ -9605,13 +9610,18 @@ def _do_test(drop_mig_ctxt, legacy_notify, network_api, instance_save,
96059610
None,
96069611
exception.PortBindingDeletionFailed(
96079612
port_id=uuids.port_id, host='fake-dest-host')]
9613+
mock_get_pci.return_value = objects.InstancePCIRequests()
96089614
self.compute._rollback_live_migration(
96099615
self.context, self.instance, 'fake-dest-host', migrate_data,
96109616
source_bdms=objects.BlockDeviceMappingList())
96119617
self.assertEqual(1, mock_log_error.call_count)
96129618
self.assertIn('Network cleanup failed for destination host',
96139619
mock_log_error.call_args[0][0])
96149620
drop_mig_ctxt.assert_called_once_with()
9621+
mock_get_pci.assert_called_once_with(
9622+
self.context, self.instance.uuid)
9623+
self.assertEqual(
9624+
mock_get_pci.return_value, self.instance.pci_requests)
96159625

96169626
_do_test()
96179627

0 commit comments

Comments
 (0)