Skip to content

Commit 4921e82

Browse files
mriedemstephenfin
authored andcommitted
Use COMPUTE_SAME_HOST_COLD_MIGRATE trait during migrate
This uses the COMPUTE_SAME_HOST_COLD_MIGRATE trait in the API during a cold migration to filter out hosts that cannot support same-host cold migration, which is all of them except for the hosts using the vCenter driver. For any nodes that do not report the trait, we won't know if they don't because they don't support it or if they are not new enough to report it, so the API has a service version check and will fallback to old behavior using the config if the node is old. That compat code can be removed in the next release. As a result of this the FakeDriver capabilities are updated so the FakeDriver no longer supports same-host cold migration and a new fake driver is added to support that scenario for any tests that need it. Change-Id: I7a4b951f3ab324c666ab924e6003d24cc8e539f5 Closes-Bug: #1748697 Related-Bug: #1811235
1 parent 9fa3600 commit 4921e82

File tree

10 files changed

+216
-8
lines changed

10 files changed

+216
-8
lines changed

lower-constraints.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ os-brick==2.6.2
6666
os-client-config==1.29.0
6767
os-resource-classes==0.4.0
6868
os-service-types==1.7.0
69-
os-traits==2.0.0
69+
os-traits==2.1.0
7070
os-vif==1.14.0
7171
os-win==3.0.0
7272
os-xenapi==0.3.3

nova/compute/api.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import string
2626

2727
from castellan import key_manager
28+
import os_traits
2829
from oslo_log import log as logging
2930
from oslo_messaging import exceptions as oslo_exceptions
3031
from oslo_serialization import base64 as base64utils
@@ -103,6 +104,7 @@
103104

104105
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
105106
MIN_COMPUTE_CROSS_CELL_RESIZE = 47
107+
MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
106108

107109
# FIXME(danms): Keep a global cache of the cells we find the
108110
# first time we look. This needs to be refreshed on a timer or
@@ -3881,8 +3883,7 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
38813883
validate_pci=True)
38823884

38833885
filter_properties = {'ignore_hosts': []}
3884-
3885-
if not CONF.allow_resize_to_same_host:
3886+
if not self._allow_resize_to_same_host(same_instance_type, instance):
38863887
filter_properties['ignore_hosts'].append(instance.host)
38873888

38883889
request_spec = objects.RequestSpec.get_by_instance_uuid(
@@ -3931,6 +3932,57 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
39313932
request_spec=request_spec,
39323933
do_cast=True)
39333934

3935+
def _allow_resize_to_same_host(self, cold_migrate, instance):
3936+
"""Contains logic for excluding the instance.host on resize/migrate.
3937+
3938+
If performing a cold migration and the compute node resource provider
3939+
reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
3940+
migration is allowed otherwise it is not and the current instance.host
3941+
should be excluded as a scheduling candidate.
3942+
3943+
:param cold_migrate: true if performing a cold migration, false
3944+
for resize
3945+
:param instance: Instance object being resized or cold migrated
3946+
:returns: True if same-host resize/cold migrate is allowed, False
3947+
otherwise
3948+
"""
3949+
if cold_migrate:
3950+
# Check to see if the compute node resource provider on which the
3951+
# instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
3952+
# trait.
3953+
# Note that we check this here in the API since we cannot
3954+
# pre-filter allocation candidates in the scheduler using this
3955+
# trait as it would not work. For example, libvirt nodes will not
3956+
# report the trait but using it as a forbidden trait filter when
3957+
# getting allocation candidates would still return libvirt nodes
3958+
# which means we could attempt to cold migrate to the same libvirt
3959+
# node, which would fail.
3960+
ctxt = instance._context
3961+
cn = objects.ComputeNode.get_by_host_and_nodename(
3962+
ctxt, instance.host, instance.node)
3963+
traits = self.placementclient.get_provider_traits(
3964+
ctxt, cn.uuid).traits
3965+
# If the provider has the trait it is (1) new enough to report that
3966+
# trait and (2) supports cold migration on the same host.
3967+
if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
3968+
allow_same_host = True
3969+
else:
3970+
# TODO(mriedem): Remove this compatibility code after one
3971+
# release. If the compute is old we will not know if it
3972+
# supports same-host cold migration so we fallback to config.
3973+
service = objects.Service.get_by_compute_host(ctxt, cn.host)
3974+
if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
3975+
# The compute is new enough to report the trait but does
3976+
# not so same-host cold migration is not allowed.
3977+
allow_same_host = False
3978+
else:
3979+
# The compute is not new enough to report the trait so we
3980+
# fallback to config.
3981+
allow_same_host = CONF.allow_resize_to_same_host
3982+
else:
3983+
allow_same_host = CONF.allow_resize_to_same_host
3984+
return allow_same_host
3985+
39343986
@check_instance_lock
39353987
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
39363988
vm_states.PAUSED, vm_states.SUSPENDED])

nova/objects/service.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
# NOTE(danms): This is the global service version counter
34-
SERVICE_VERSION = 47
34+
SERVICE_VERSION = 48
3535

3636

3737
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -176,6 +176,8 @@
176176
# Version 47: Compute RPC v5.10:
177177
# finish_revert_snapshot_based_resize_at_source
178178
{'compute_rpc': '5.10'},
179+
# Version 48: Drivers report COMPUTE_SAME_HOST_COLD_MIGRATE trait.
180+
{'compute_rpc': '5.10'},
179181
)
180182

181183

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import mock
14+
15+
from nova.compute import api as compute_api
16+
from nova import context as nova_context
17+
from nova import objects
18+
from nova import test
19+
from nova.tests.functional import integrated_helpers
20+
from nova.tests.unit import fake_notifier
21+
22+
23+
class ColdMigrationDisallowSameHost(
24+
integrated_helpers.ProviderUsageBaseTestCase):
25+
"""Tests cold migrate where the source host does not have the
26+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
27+
"""
28+
compute_driver = 'fake.MediumFakeDriver'
29+
30+
def setUp(self):
31+
super(ColdMigrationDisallowSameHost, self).setUp()
32+
# Start one compute service which will use the fake virt driver
33+
# which disallows cold migration to the same host.
34+
self._start_compute('host1')
35+
36+
def _wait_for_migrate_no_valid_host(self, error='NoValidHost'):
37+
event = fake_notifier.wait_for_versioned_notifications(
38+
'compute_task.migrate_server.error')[0]
39+
self.assertEqual(error,
40+
event['payload']['nova_object.data']['reason'][
41+
'nova_object.data']['exception'])
42+
43+
def test_cold_migrate_same_host_not_supported(self):
44+
"""Simple test to show that you cannot cold-migrate to the same host
45+
when the resource provider does not expose the
46+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
47+
"""
48+
server = self._create_server(networks='none')
49+
# The fake driver does not report COMPUTE_SAME_HOST_COLD_MIGRATE
50+
# so cold migration should fail since we only have one host.
51+
self.api.post_server_action(server['id'], {'migrate': None})
52+
self._wait_for_migrate_no_valid_host()
53+
54+
def test_cold_migrate_same_host_old_compute_disallow(self):
55+
"""Upgrade compat test where the resource provider does not report
56+
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
57+
old so the API falls back to the allow_resize_to_same_host config which
58+
defaults to False.
59+
"""
60+
server = self._create_server(networks='none')
61+
# Stub the compute service version check to make the compute service
62+
# appear old.
63+
fake_service = objects.Service()
64+
fake_service.version = (
65+
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
66+
with mock.patch('nova.objects.Service.get_by_compute_host',
67+
return_value=fake_service) as mock_get_service:
68+
self.api.post_server_action(server['id'], {'migrate': None})
69+
mock_get_service.assert_called_once_with(
70+
test.MatchType(nova_context.RequestContext), 'host1')
71+
# Since allow_resize_to_same_host defaults to False scheduling failed
72+
# since there are no other hosts.
73+
self._wait_for_migrate_no_valid_host()
74+
75+
def test_cold_migrate_same_host_old_compute_allow(self):
76+
"""Upgrade compat test where the resource provider does not report
77+
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
78+
old so the API falls back to the allow_resize_to_same_host config which
79+
in this test is set to True.
80+
"""
81+
self.flags(allow_resize_to_same_host=True)
82+
server = self._create_server(networks='none')
83+
# Stub the compute service version check to make the compute service
84+
# appear old.
85+
fake_service = objects.Service()
86+
fake_service.version = (
87+
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
88+
with mock.patch('nova.objects.Service.get_by_compute_host',
89+
return_value=fake_service) as mock_get_service:
90+
self.api.post_server_action(server['id'], {'migrate': None})
91+
mock_get_service.assert_called_once_with(
92+
test.MatchType(nova_context.RequestContext), 'host1')
93+
# In this case the compute is old so the API falls back to checking
94+
# allow_resize_to_same_host which says same-host cold migrate is
95+
# allowed so the scheduler sends the request to the only compute
96+
# available but the virt driver says same-host cold migrate is not
97+
# supported and raises UnableToMigrateToSelf. A reschedule is sent
98+
# to conductor which results in MaxRetriesExceeded since there are no
99+
# alternative hosts.
100+
self._wait_for_migrate_no_valid_host(error='MaxRetriesExceeded')
101+
102+
103+
class ColdMigrationAllowSameHost(
104+
integrated_helpers.ProviderUsageBaseTestCase):
105+
"""Tests cold migrate where the source host has the
106+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
107+
"""
108+
compute_driver = 'fake.SameHostColdMigrateDriver'
109+
110+
def setUp(self):
111+
super(ColdMigrationAllowSameHost, self).setUp()
112+
self._start_compute('host1')
113+
114+
def test_cold_migrate_same_host_supported(self):
115+
"""Simple test to show that you can cold-migrate to the same host
116+
when the resource provider exposes the COMPUTE_SAME_HOST_COLD_MIGRATE
117+
trait.
118+
"""
119+
server = self._create_server(networks='none')
120+
self.api.post_server_action(server['id'], {'migrate': None})
121+
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
122+
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])

nova/tests/unit/compute/test_compute_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,7 @@ def test_revert_resize_concurrent_fail(
17411741

17421742
@mock.patch('nova.compute.api.API.get_instance_host_status',
17431743
new=mock.Mock(return_value=fields_obj.HostStatus.UP))
1744+
@mock.patch('nova.compute.api.API._allow_resize_to_same_host')
17441745
@mock.patch('nova.compute.utils.is_volume_backed_instance',
17451746
return_value=False)
17461747
@mock.patch('nova.compute.api.API._validate_flavor_image_nostatus')
@@ -1757,6 +1758,7 @@ def _test_resize(self, mock_get_all_by_host,
17571758
mock_get_by_instance_uuid, mock_get_flavor, mock_upsize,
17581759
mock_inst_save, mock_count, mock_limit, mock_record,
17591760
mock_migration, mock_validate, mock_is_vol_backed,
1761+
mock_allow_resize_to_same_host,
17601762
flavor_id_passed=True,
17611763
same_host=False, allow_same_host=False,
17621764
project_id=None,
@@ -1768,6 +1770,7 @@ def _test_resize(self, mock_get_all_by_host,
17681770
allow_cross_cell_resize=False):
17691771

17701772
self.flags(allow_resize_to_same_host=allow_same_host)
1773+
mock_allow_resize_to_same_host.return_value = allow_same_host
17711774

17721775
params = {}
17731776
if project_id is not None:
@@ -1871,6 +1874,7 @@ def _check_state(expected_task_state=None):
18711874
self.assertEqual(
18721875
allow_cross_cell_resize,
18731876
fake_spec.requested_destination.allow_cross_cell_move)
1877+
mock_allow_resize_to_same_host.assert_called_once()
18741878

18751879
if host_name:
18761880
mock_get_all_by_host.assert_called_once_with(

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7705,6 +7705,7 @@ def setUp(self):
77057705
super(ComputeManagerMigrationTestCase, self).setUp()
77067706
fake_notifier.stub_notifier(self)
77077707
self.addCleanup(fake_notifier.reset)
7708+
self.flags(compute_driver='fake.SameHostColdMigrateDriver')
77087709
self.compute = manager.ComputeManager()
77097710
self.context = context.RequestContext(fakes.FAKE_USER_ID,
77107711
fakes.FAKE_PROJECT_ID)
@@ -9658,7 +9659,7 @@ def _get_migration(self, migration_id, status, migration_type):
96589659

96599660
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
96609661
@mock.patch.object(objects.Migration, 'get_by_id')
9661-
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
9662+
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
96629663
@mock.patch('nova.compute.utils.notify_about_instance_action')
96639664
def test_live_migration_abort(self, mock_notify_action, mock_driver,
96649665
mock_get_migration, mock_notify):
@@ -9711,7 +9712,7 @@ def test_live_migration_abort_queued(self, mock_notify_action,
97119712
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
97129713
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
97139714
@mock.patch.object(objects.Migration, 'get_by_id')
9714-
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
9715+
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
97159716
@mock.patch('nova.compute.utils.notify_about_instance_action')
97169717
def test_live_migration_abort_not_supported(self, mock_notify_action,
97179718
mock_driver,

nova/virt/driver.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ def block_device_info_get_mapping(block_device_info):
124124
"supports_image_type_vmdk": os_traits.COMPUTE_IMAGE_TYPE_VMDK,
125125
# Added in os-traits 2.0.0
126126
"supports_image_type_ploop": os_traits.COMPUTE_IMAGE_TYPE_PLOOP,
127+
128+
# Added in os-traits 2.1.0.
129+
"supports_migrate_to_same_host": os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE,
127130
}
128131

129132

nova/virt/fake.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class FakeDriver(driver.ComputeDriver):
104104
capabilities = {
105105
"has_imagecache": True,
106106
"supports_evacuate": True,
107-
"supports_migrate_to_same_host": True,
107+
"supports_migrate_to_same_host": False,
108108
"supports_attach_interface": True,
109109
"supports_device_tagging": True,
110110
"supports_tagged_attach_interface": True,
@@ -702,6 +702,12 @@ class MediumFakeDriver(FakeDriver):
702702
local_gb = 1028
703703

704704

705+
class SameHostColdMigrateDriver(MediumFakeDriver):
706+
"""MediumFakeDriver variant that supports same-host cold migrate."""
707+
capabilities = dict(FakeDriver.capabilities,
708+
supports_migrate_to_same_host=True)
709+
710+
705711
class PowerUpdateFakeDriver(SmallFakeDriver):
706712
# A specific fake driver for the power-update external event testing.
707713

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
fixes:
3+
- |
4+
This release contains a fix for `bug 1748697`_ which distinguishes between
5+
resize and cold migrate such that the ``allow_resize_to_same_host`` config
6+
option is no longer used to determine if a server can be cold migrated to
7+
the same compute service host. Now when requesting a cold migration the
8+
API will check if the source compute node resource provider has the
9+
``COMPUTE_SAME_HOST_COLD_MIGRATE`` trait and if so the scheduler can select
10+
the source host. Note that the only in-tree compute driver that supports
11+
cold migration to the same host is ``VMwareVCDriver``.
12+
13+
To support rolling upgrades with N-1 computes if a node does not report the
14+
trait and is old the API will fallback to the ``allow_resize_to_same_host``
15+
config option value. That compatibility will be removed in a subsequent
16+
release.
17+
18+
.. _bug 1748697: https://launchpad.net/bugs/1748697

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ psutil>=3.2.2 # BSD
5555
oslo.versionedobjects>=1.35.0 # Apache-2.0
5656
os-brick>=2.6.2 # Apache-2.0
5757
os-resource-classes>=0.4.0 # Apache-2.0
58-
os-traits>=2.0.0 # Apache-2.0
58+
os-traits>=2.1.0 # Apache-2.0
5959
os-vif>=1.14.0 # Apache-2.0
6060
os-win>=3.0.0 # Apache-2.0
6161
castellan>=0.16.0 # Apache-2.0

0 commit comments

Comments
 (0)