Skip to content

Commit c163151

Browse files
Zuulopenstack-gerrit
authored andcommitted
Merge "Use COMPUTE_SAME_HOST_COLD_MIGRATE trait during migrate"
2 parents b8f4e46 + 4921e82 commit c163151

File tree

10 files changed

+216
-8
lines changed

10 files changed

+216
-8
lines changed

lower-constraints.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ os-brick==2.6.2
6666
os-client-config==1.29.0
6767
os-resource-classes==0.4.0
6868
os-service-types==1.7.0
69-
os-traits==2.0.0
69+
os-traits==2.1.0
7070
os-vif==1.14.0
7171
os-win==3.0.0
7272
os-xenapi==0.3.3

nova/compute/api.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import string
2626

2727
from castellan import key_manager
28+
import os_traits
2829
from oslo_log import log as logging
2930
from oslo_messaging import exceptions as oslo_exceptions
3031
from oslo_serialization import base64 as base64utils
@@ -103,6 +104,7 @@
103104

104105
MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
105106
MIN_COMPUTE_CROSS_CELL_RESIZE = 47
107+
MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
106108

107109
# FIXME(danms): Keep a global cache of the cells we find the
108110
# first time we look. This needs to be refreshed on a timer or
@@ -3881,8 +3883,7 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
38813883
validate_pci=True)
38823884

38833885
filter_properties = {'ignore_hosts': []}
3884-
3885-
if not CONF.allow_resize_to_same_host:
3886+
if not self._allow_resize_to_same_host(same_instance_type, instance):
38863887
filter_properties['ignore_hosts'].append(instance.host)
38873888

38883889
request_spec = objects.RequestSpec.get_by_instance_uuid(
@@ -3931,6 +3932,57 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
39313932
request_spec=request_spec,
39323933
do_cast=True)
39333934

3935+
def _allow_resize_to_same_host(self, cold_migrate, instance):
3936+
"""Contains logic for excluding the instance.host on resize/migrate.
3937+
3938+
If performing a cold migration and the compute node resource provider
3939+
reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
3940+
migration is allowed otherwise it is not and the current instance.host
3941+
should be excluded as a scheduling candidate.
3942+
3943+
:param cold_migrate: true if performing a cold migration, false
3944+
for resize
3945+
:param instance: Instance object being resized or cold migrated
3946+
:returns: True if same-host resize/cold migrate is allowed, False
3947+
otherwise
3948+
"""
3949+
if cold_migrate:
3950+
# Check to see if the compute node resource provider on which the
3951+
# instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
3952+
# trait.
3953+
# Note that we check this here in the API since we cannot
3954+
# pre-filter allocation candidates in the scheduler using this
3955+
# trait as it would not work. For example, libvirt nodes will not
3956+
# report the trait but using it as a forbidden trait filter when
3957+
# getting allocation candidates would still return libvirt nodes
3958+
# which means we could attempt to cold migrate to the same libvirt
3959+
# node, which would fail.
3960+
ctxt = instance._context
3961+
cn = objects.ComputeNode.get_by_host_and_nodename(
3962+
ctxt, instance.host, instance.node)
3963+
traits = self.placementclient.get_provider_traits(
3964+
ctxt, cn.uuid).traits
3965+
# If the provider has the trait it is (1) new enough to report that
3966+
# trait and (2) supports cold migration on the same host.
3967+
if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
3968+
allow_same_host = True
3969+
else:
3970+
# TODO(mriedem): Remove this compatibility code after one
3971+
# release. If the compute is old we will not know if it
3972+
# supports same-host cold migration so we fallback to config.
3973+
service = objects.Service.get_by_compute_host(ctxt, cn.host)
3974+
if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
3975+
# The compute is new enough to report the trait but does
3976+
# not so same-host cold migration is not allowed.
3977+
allow_same_host = False
3978+
else:
3979+
# The compute is not new enough to report the trait so we
3980+
# fallback to config.
3981+
allow_same_host = CONF.allow_resize_to_same_host
3982+
else:
3983+
allow_same_host = CONF.allow_resize_to_same_host
3984+
return allow_same_host
3985+
39343986
@check_instance_lock
39353987
@check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
39363988
vm_states.PAUSED, vm_states.SUSPENDED])

nova/objects/service.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232

3333
# NOTE(danms): This is the global service version counter
34-
SERVICE_VERSION = 47
34+
SERVICE_VERSION = 48
3535

3636

3737
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -176,6 +176,8 @@
176176
# Version 47: Compute RPC v5.10:
177177
# finish_revert_snapshot_based_resize_at_source
178178
{'compute_rpc': '5.10'},
179+
# Version 48: Drivers report COMPUTE_SAME_HOST_COLD_MIGRATE trait.
180+
{'compute_rpc': '5.10'},
179181
)
180182

181183

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
13+
import mock
14+
15+
from nova.compute import api as compute_api
16+
from nova import context as nova_context
17+
from nova import objects
18+
from nova import test
19+
from nova.tests.functional import integrated_helpers
20+
from nova.tests.unit import fake_notifier
21+
22+
23+
class ColdMigrationDisallowSameHost(
24+
integrated_helpers.ProviderUsageBaseTestCase):
25+
"""Tests cold migrate where the source host does not have the
26+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
27+
"""
28+
compute_driver = 'fake.MediumFakeDriver'
29+
30+
def setUp(self):
31+
super(ColdMigrationDisallowSameHost, self).setUp()
32+
# Start one compute service which will use the fake virt driver
33+
# which disallows cold migration to the same host.
34+
self._start_compute('host1')
35+
36+
def _wait_for_migrate_no_valid_host(self, error='NoValidHost'):
37+
event = fake_notifier.wait_for_versioned_notifications(
38+
'compute_task.migrate_server.error')[0]
39+
self.assertEqual(error,
40+
event['payload']['nova_object.data']['reason'][
41+
'nova_object.data']['exception'])
42+
43+
def test_cold_migrate_same_host_not_supported(self):
44+
"""Simple test to show that you cannot cold-migrate to the same host
45+
when the resource provider does not expose the
46+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
47+
"""
48+
server = self._create_server(networks='none')
49+
# The fake driver does not report COMPUTE_SAME_HOST_COLD_MIGRATE
50+
# so cold migration should fail since we only have one host.
51+
self.api.post_server_action(server['id'], {'migrate': None})
52+
self._wait_for_migrate_no_valid_host()
53+
54+
def test_cold_migrate_same_host_old_compute_disallow(self):
55+
"""Upgrade compat test where the resource provider does not report
56+
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
57+
old so the API falls back to the allow_resize_to_same_host config which
58+
defaults to False.
59+
"""
60+
server = self._create_server(networks='none')
61+
# Stub the compute service version check to make the compute service
62+
# appear old.
63+
fake_service = objects.Service()
64+
fake_service.version = (
65+
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
66+
with mock.patch('nova.objects.Service.get_by_compute_host',
67+
return_value=fake_service) as mock_get_service:
68+
self.api.post_server_action(server['id'], {'migrate': None})
69+
mock_get_service.assert_called_once_with(
70+
test.MatchType(nova_context.RequestContext), 'host1')
71+
# Since allow_resize_to_same_host defaults to False scheduling failed
72+
# since there are no other hosts.
73+
self._wait_for_migrate_no_valid_host()
74+
75+
def test_cold_migrate_same_host_old_compute_allow(self):
76+
"""Upgrade compat test where the resource provider does not report
77+
the COMPUTE_SAME_HOST_COLD_MIGRATE trait but the compute service is
78+
old so the API falls back to the allow_resize_to_same_host config which
79+
in this test is set to True.
80+
"""
81+
self.flags(allow_resize_to_same_host=True)
82+
server = self._create_server(networks='none')
83+
# Stub the compute service version check to make the compute service
84+
# appear old.
85+
fake_service = objects.Service()
86+
fake_service.version = (
87+
compute_api.MIN_COMPUTE_SAME_HOST_COLD_MIGRATE - 1)
88+
with mock.patch('nova.objects.Service.get_by_compute_host',
89+
return_value=fake_service) as mock_get_service:
90+
self.api.post_server_action(server['id'], {'migrate': None})
91+
mock_get_service.assert_called_once_with(
92+
test.MatchType(nova_context.RequestContext), 'host1')
93+
# In this case the compute is old so the API falls back to checking
94+
# allow_resize_to_same_host which says same-host cold migrate is
95+
# allowed so the scheduler sends the request to the only compute
96+
# available but the virt driver says same-host cold migrate is not
97+
# supported and raises UnableToMigrateToSelf. A reschedule is sent
98+
# to conductor which results in MaxRetriesExceeded since there are no
99+
# alternative hosts.
100+
self._wait_for_migrate_no_valid_host(error='MaxRetriesExceeded')
101+
102+
103+
class ColdMigrationAllowSameHost(
104+
integrated_helpers.ProviderUsageBaseTestCase):
105+
"""Tests cold migrate where the source host has the
106+
COMPUTE_SAME_HOST_COLD_MIGRATE trait.
107+
"""
108+
compute_driver = 'fake.SameHostColdMigrateDriver'
109+
110+
def setUp(self):
111+
super(ColdMigrationAllowSameHost, self).setUp()
112+
self._start_compute('host1')
113+
114+
def test_cold_migrate_same_host_supported(self):
115+
"""Simple test to show that you can cold-migrate to the same host
116+
when the resource provider exposes the COMPUTE_SAME_HOST_COLD_MIGRATE
117+
trait.
118+
"""
119+
server = self._create_server(networks='none')
120+
self.api.post_server_action(server['id'], {'migrate': None})
121+
server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
122+
self.assertEqual('host1', server['OS-EXT-SRV-ATTR:host'])

nova/tests/unit/compute/test_compute_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,6 +1741,7 @@ def test_revert_resize_concurrent_fail(
17411741

17421742
@mock.patch('nova.compute.api.API.get_instance_host_status',
17431743
new=mock.Mock(return_value=fields_obj.HostStatus.UP))
1744+
@mock.patch('nova.compute.api.API._allow_resize_to_same_host')
17441745
@mock.patch('nova.compute.utils.is_volume_backed_instance',
17451746
return_value=False)
17461747
@mock.patch('nova.compute.api.API._validate_flavor_image_nostatus')
@@ -1757,6 +1758,7 @@ def _test_resize(self, mock_get_all_by_host,
17571758
mock_get_by_instance_uuid, mock_get_flavor, mock_upsize,
17581759
mock_inst_save, mock_count, mock_limit, mock_record,
17591760
mock_migration, mock_validate, mock_is_vol_backed,
1761+
mock_allow_resize_to_same_host,
17601762
flavor_id_passed=True,
17611763
same_host=False, allow_same_host=False,
17621764
project_id=None,
@@ -1768,6 +1770,7 @@ def _test_resize(self, mock_get_all_by_host,
17681770
allow_cross_cell_resize=False):
17691771

17701772
self.flags(allow_resize_to_same_host=allow_same_host)
1773+
mock_allow_resize_to_same_host.return_value = allow_same_host
17711774

17721775
params = {}
17731776
if project_id is not None:
@@ -1871,6 +1874,7 @@ def _check_state(expected_task_state=None):
18711874
self.assertEqual(
18721875
allow_cross_cell_resize,
18731876
fake_spec.requested_destination.allow_cross_cell_move)
1877+
mock_allow_resize_to_same_host.assert_called_once()
18741878

18751879
if host_name:
18761880
mock_get_all_by_host.assert_called_once_with(

nova/tests/unit/compute/test_compute_mgr.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7705,6 +7705,7 @@ def setUp(self):
77057705
super(ComputeManagerMigrationTestCase, self).setUp()
77067706
fake_notifier.stub_notifier(self)
77077707
self.addCleanup(fake_notifier.reset)
7708+
self.flags(compute_driver='fake.SameHostColdMigrateDriver')
77087709
self.compute = manager.ComputeManager()
77097710
self.context = context.RequestContext(fakes.FAKE_USER_ID,
77107711
fakes.FAKE_PROJECT_ID)
@@ -9658,7 +9659,7 @@ def _get_migration(self, migration_id, status, migration_type):
96589659

96599660
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
96609661
@mock.patch.object(objects.Migration, 'get_by_id')
9661-
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
9662+
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
96629663
@mock.patch('nova.compute.utils.notify_about_instance_action')
96639664
def test_live_migration_abort(self, mock_notify_action, mock_driver,
96649665
mock_get_migration, mock_notify):
@@ -9711,7 +9712,7 @@ def test_live_migration_abort_queued(self, mock_notify_action,
97119712
@mock.patch.object(compute_utils, 'add_instance_fault_from_exc')
97129713
@mock.patch.object(manager.ComputeManager, '_notify_about_instance_usage')
97139714
@mock.patch.object(objects.Migration, 'get_by_id')
9714-
@mock.patch.object(nova.virt.fake.SmallFakeDriver, 'live_migration_abort')
9715+
@mock.patch.object(nova.virt.fake.FakeDriver, 'live_migration_abort')
97159716
@mock.patch('nova.compute.utils.notify_about_instance_action')
97169717
def test_live_migration_abort_not_supported(self, mock_notify_action,
97179718
mock_driver,

nova/virt/driver.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ def block_device_info_get_mapping(block_device_info):
124124
"supports_image_type_vmdk": os_traits.COMPUTE_IMAGE_TYPE_VMDK,
125125
# Added in os-traits 2.0.0
126126
"supports_image_type_ploop": os_traits.COMPUTE_IMAGE_TYPE_PLOOP,
127+
128+
# Added in os-traits 2.1.0.
129+
"supports_migrate_to_same_host": os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE,
127130
}
128131

129132

nova/virt/fake.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class FakeDriver(driver.ComputeDriver):
104104
capabilities = {
105105
"has_imagecache": True,
106106
"supports_evacuate": True,
107-
"supports_migrate_to_same_host": True,
107+
"supports_migrate_to_same_host": False,
108108
"supports_attach_interface": True,
109109
"supports_device_tagging": True,
110110
"supports_tagged_attach_interface": True,
@@ -702,6 +702,12 @@ class MediumFakeDriver(FakeDriver):
702702
local_gb = 1028
703703

704704

705+
class SameHostColdMigrateDriver(MediumFakeDriver):
706+
"""MediumFakeDriver variant that supports same-host cold migrate."""
707+
capabilities = dict(FakeDriver.capabilities,
708+
supports_migrate_to_same_host=True)
709+
710+
705711
class PowerUpdateFakeDriver(SmallFakeDriver):
706712
# A specific fake driver for the power-update external event testing.
707713

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
fixes:
3+
- |
4+
This release contains a fix for `bug 1748697`_ which distinguishes between
5+
resize and cold migrate such that the ``allow_resize_to_same_host`` config
6+
option is no longer used to determine if a server can be cold migrated to
7+
the same compute service host. Now when requesting a cold migration the
8+
API will check if the source compute node resource provider has the
9+
``COMPUTE_SAME_HOST_COLD_MIGRATE`` trait and if so the scheduler can select
10+
the source host. Note that the only in-tree compute driver that supports
11+
cold migration to the same host is ``VMwareVCDriver``.
12+
13+
To support rolling upgrades with N-1 computes if a node does not report the
14+
trait and is old the API will fallback to the ``allow_resize_to_same_host``
15+
config option value. That compatibility will be removed in a subsequent
16+
release.
17+
18+
.. _bug 1748697: https://launchpad.net/bugs/1748697

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ psutil>=3.2.2 # BSD
5555
oslo.versionedobjects>=1.35.0 # Apache-2.0
5656
os-brick>=2.6.2 # Apache-2.0
5757
os-resource-classes>=0.4.0 # Apache-2.0
58-
os-traits>=2.0.0 # Apache-2.0
58+
os-traits>=2.1.0 # Apache-2.0
5959
os-vif>=1.14.0 # Apache-2.0
6060
os-win>=3.0.0 # Apache-2.0
6161
castellan>=0.16.0 # Apache-2.0

0 commit comments

Comments
 (0)