Skip to content

Commit 1456f6d

Browse files
authored
Merge pull request #104 from stackhpc/upstream/2023.1-2024-09-02
Synchronise 2023.1 with upstream
2 parents 7f8fa87 + 6b78420 commit 1456f6d

File tree

10 files changed

+325
-132
lines changed

10 files changed

+325
-132
lines changed

doc/notification_samples/instance-create-error.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
"ip_addresses": [],
1919
"launched_at": null,
2020
"power_state": "pending",
21-
"state": "building"
21+
"state": "building",
22+
"host": null,
23+
"node": null
2224
}
2325
},
2426
"priority":"ERROR",

doc/source/cli/nova-manage.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,7 +1531,9 @@ command.
15311531
* - 5
15321532
- Instance state invalid (must be stopped and unlocked)
15331533
* - 6
1534-
- Instance is not attached to volume
1534+
- Volume is not attached to the instance
1535+
* - 7
1536+
- Connector host is not correct
15351537

15361538

15371539
Libvirt Commands

nova/cmd/manage.py

Lines changed: 134 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"""
2323

2424
import collections
25+
from contextlib import contextmanager
2526
import functools
2627
import os
2728
import re
@@ -144,6 +145,33 @@ def format_dict(dct, dict_property="Property", dict_value='Value',
144145
return encodeutils.safe_encode(pt.get_string()).decode()
145146

146147

148+
@contextmanager
149+
def locked_instance(cell_mapping, instance, reason):
150+
"""Context manager to lock and unlock instance,
151+
lock state will be restored regardless of the success or failure
152+
of target functionality.
153+
154+
:param cell_mapping: instance-cell-mapping
155+
:param instance: instance to be lock and unlock
156+
:param reason: reason, why lock is required
157+
"""
158+
159+
compute_api = api.API()
160+
161+
initial_state = 'locked' if instance.locked else 'unlocked'
162+
if not instance.locked:
163+
with context.target_cell(
164+
context.get_admin_context(), cell_mapping) as cctxt:
165+
compute_api.lock(cctxt, instance, reason=reason)
166+
try:
167+
yield
168+
finally:
169+
if initial_state == 'unlocked':
170+
with context.target_cell(
171+
context.get_admin_context(), cell_mapping) as cctxt:
172+
compute_api.unlock(cctxt, instance)
173+
174+
147175
class DbCommands(object):
148176
"""Class for managing the main database."""
149177

@@ -2998,10 +3026,8 @@ def _refresh(self, instance_uuid, volume_id, connector):
29983026
:param instance_uuid: UUID of instance
29993027
:param volume_id: ID of volume attached to the instance
30003028
:param connector: Connector with which to create the new attachment
3029+
:return status_code: volume-refresh status_code 0 on success
30013030
"""
3002-
volume_api = cinder.API()
3003-
compute_rpcapi = rpcapi.ComputeAPI()
3004-
compute_api = api.API()
30053031

30063032
ctxt = context.get_admin_context()
30073033
im = objects.InstanceMapping.get_by_instance_uuid(ctxt, instance_uuid)
@@ -3017,111 +3043,104 @@ def _refresh(self, instance_uuid, volume_id, connector):
30173043
state=instance.vm_state,
30183044
method='refresh connection_info (must be stopped)')
30193045

3020-
if instance.locked:
3021-
raise exception.InstanceInvalidState(
3022-
instance_uuid=instance_uuid, attr='locked', state='True',
3023-
method='refresh connection_info (must be unlocked)')
3024-
3025-
compute_api.lock(
3026-
cctxt, instance,
3027-
reason=(
3028-
f'Refreshing connection_info for BDM {bdm.uuid} '
3029-
f'associated with instance {instance_uuid} and volume '
3030-
f'{volume_id}.'))
3031-
3032-
# NOTE(lyarwood): Yes this is weird but we need to recreate the admin
3033-
# context here to ensure the lock above uses a unique request-id
3034-
# versus the following refresh and eventual unlock.
3035-
ctxt = context.get_admin_context()
3036-
with context.target_cell(ctxt, im.cell_mapping) as cctxt:
3037-
instance_action = None
3038-
new_attachment_id = None
3039-
try:
3040-
# Log this as an instance action so operators and users are
3041-
# aware that this has happened.
3042-
instance_action = objects.InstanceAction.action_start(
3043-
cctxt, instance_uuid,
3044-
instance_actions.NOVA_MANAGE_REFRESH_VOLUME_ATTACHMENT)
3045-
3046-
# Create a blank attachment to keep the volume reserved
3047-
new_attachment_id = volume_api.attachment_create(
3048-
cctxt, volume_id, instance_uuid)['id']
3049-
3050-
# RPC call to the compute to cleanup the connections, which
3051-
# will in turn unmap the volume from the compute host
3052-
# TODO(lyarwood): Add delete_attachment as a kwarg to
3053-
# remove_volume_connection as is available in the private
3054-
# method within the manager.
3046+
locking_reason = (
3047+
f'Refreshing connection_info for BDM {bdm.uuid} '
3048+
f'associated with instance {instance_uuid} and volume '
3049+
f'{volume_id}.')
3050+
3051+
with locked_instance(im.cell_mapping, instance, locking_reason):
3052+
return self._do_refresh(
3053+
cctxt, instance, volume_id, bdm, connector)
3054+
3055+
def _do_refresh(self, cctxt, instance,
3056+
volume_id, bdm, connector):
3057+
volume_api = cinder.API()
3058+
compute_rpcapi = rpcapi.ComputeAPI()
3059+
3060+
new_attachment_id = None
3061+
try:
3062+
# Log this as an instance action so operators and users are
3063+
# aware that this has happened.
3064+
instance_action = objects.InstanceAction.action_start(
3065+
cctxt, instance.uuid,
3066+
instance_actions.NOVA_MANAGE_REFRESH_VOLUME_ATTACHMENT)
3067+
3068+
# Create a blank attachment to keep the volume reserved
3069+
new_attachment_id = volume_api.attachment_create(
3070+
cctxt, volume_id, instance.uuid)['id']
3071+
3072+
# RPC call to the compute to cleanup the connections, which
3073+
# will in turn unmap the volume from the compute host
3074+
# TODO(lyarwood): Add delete_attachment as a kwarg to
3075+
# remove_volume_connection as is available in the private
3076+
# method within the manager.
3077+
if instance.host == connector['host']:
30553078
compute_rpcapi.remove_volume_connection(
30563079
cctxt, instance, volume_id, instance.host)
3080+
else:
3081+
msg = (
3082+
f"The compute host '{connector['host']}' in the "
3083+
f"connector does not match the instance host "
3084+
f"'{instance.host}'.")
3085+
raise exception.HostConflict(_(msg))
3086+
3087+
# Delete the existing volume attachment if present in the bdm.
3088+
# This isn't present when the original attachment was made
3089+
# using the legacy cinderv2 APIs before the cinderv3 attachment
3090+
# based APIs were present.
3091+
if bdm.attachment_id:
3092+
volume_api.attachment_delete(cctxt, bdm.attachment_id)
3093+
3094+
# Update the attachment with host connector, this regenerates
3095+
# the connection_info that we can now stash in the bdm.
3096+
new_connection_info = volume_api.attachment_update(
3097+
cctxt, new_attachment_id, connector,
3098+
bdm.device_name)['connection_info']
3099+
3100+
# Before we save it to the BDM ensure the serial is stashed as
3101+
# is done in various other codepaths when attaching volumes.
3102+
if 'serial' not in new_connection_info:
3103+
new_connection_info['serial'] = bdm.volume_id
3104+
3105+
# Save the new attachment id and connection_info to the DB
3106+
bdm.attachment_id = new_attachment_id
3107+
bdm.connection_info = jsonutils.dumps(new_connection_info)
3108+
bdm.save()
3109+
3110+
# Finally mark the attachment as complete, moving the volume
3111+
# status from attaching to in-use ahead of the instance
3112+
# restarting
3113+
volume_api.attachment_complete(cctxt, new_attachment_id)
3114+
return 0
30573115

3058-
# Delete the existing volume attachment if present in the bdm.
3059-
# This isn't present when the original attachment was made
3060-
# using the legacy cinderv2 APIs before the cinderv3 attachment
3061-
# based APIs were present.
3062-
if bdm.attachment_id:
3063-
volume_api.attachment_delete(cctxt, bdm.attachment_id)
3064-
3065-
# Update the attachment with host connector, this regenerates
3066-
# the connection_info that we can now stash in the bdm.
3067-
new_connection_info = volume_api.attachment_update(
3068-
cctxt, new_attachment_id, connector,
3069-
bdm.device_name)['connection_info']
3070-
3071-
# Before we save it to the BDM ensure the serial is stashed as
3072-
# is done in various other codepaths when attaching volumes.
3073-
if 'serial' not in new_connection_info:
3074-
new_connection_info['serial'] = bdm.volume_id
3075-
3076-
# Save the new attachment id and connection_info to the DB
3077-
bdm.attachment_id = new_attachment_id
3078-
bdm.connection_info = jsonutils.dumps(new_connection_info)
3116+
finally:
3117+
# If the bdm.attachment_id wasn't updated make sure we clean
3118+
# up any attachments created during the run.
3119+
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
3120+
cctxt, volume_id, instance.uuid)
3121+
if (
3122+
new_attachment_id and
3123+
bdm.attachment_id != new_attachment_id
3124+
):
3125+
volume_api.attachment_delete(cctxt, new_attachment_id)
3126+
3127+
# If we failed during attachment_update the bdm.attachment_id
3128+
# has already been deleted so recreate it now to ensure the
3129+
# volume is still associated with the instance and clear the
3130+
# now stale connection_info.
3131+
try:
3132+
volume_api.attachment_get(cctxt, bdm.attachment_id)
3133+
except exception.VolumeAttachmentNotFound:
3134+
bdm.attachment_id = volume_api.attachment_create(
3135+
cctxt, volume_id, instance.uuid)['id']
3136+
bdm.connection_info = None
30793137
bdm.save()
30803138

3081-
# Finally mark the attachment as complete, moving the volume
3082-
# status from attaching to in-use ahead of the instance
3083-
# restarting
3084-
volume_api.attachment_complete(cctxt, new_attachment_id)
3085-
return 0
3086-
3087-
finally:
3088-
# If the bdm.attachment_id wasn't updated make sure we clean
3089-
# up any attachments created during the run.
3090-
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
3091-
cctxt, volume_id, instance_uuid)
3092-
if (
3093-
new_attachment_id and
3094-
bdm.attachment_id != new_attachment_id
3095-
):
3096-
volume_api.attachment_delete(cctxt, new_attachment_id)
3097-
3098-
# If we failed during attachment_update the bdm.attachment_id
3099-
# has already been deleted so recreate it now to ensure the
3100-
# volume is still associated with the instance and clear the
3101-
# now stale connection_info.
3102-
try:
3103-
volume_api.attachment_get(cctxt, bdm.attachment_id)
3104-
except exception.VolumeAttachmentNotFound:
3105-
bdm.attachment_id = volume_api.attachment_create(
3106-
cctxt, volume_id, instance_uuid)['id']
3107-
bdm.connection_info = None
3108-
bdm.save()
3109-
3110-
# Finish the instance action if it was created and started
3111-
# TODO(lyarwood): While not really required we should store
3112-
# the exec and traceback in here on failure.
3113-
if instance_action:
3114-
instance_action.finish()
3115-
3116-
# NOTE(lyarwood): As above we need to unlock the instance with
3117-
# a fresh context and request-id to keep it unique. It's safe
3118-
# to assume that the instance is locked as this point as the
3119-
# earlier call to lock isn't part of this block.
3120-
with context.target_cell(
3121-
context.get_admin_context(),
3122-
im.cell_mapping
3123-
) as u_cctxt:
3124-
compute_api.unlock(u_cctxt, instance)
3139+
# Finish the instance action if it was created and started
3140+
# TODO(lyarwood): While not really required we should store
3141+
# the exec and traceback in here on failure.
3142+
if instance_action:
3143+
instance_action.finish()
31253144

31263145
@action_description(
31273146
_("Refresh the connection info for a given volume attachment"))
@@ -3145,6 +3164,7 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
31453164
* 4: Instance does not exist.
31463165
* 5: Instance state invalid.
31473166
* 6: Volume is not attached to instance.
3167+
* 7: Connector host is not correct.
31483168
"""
31493169
try:
31503170
# TODO(lyarwood): Make this optional and provide a rpcapi capable
@@ -3160,6 +3180,12 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
31603180
# Refresh the volume attachment
31613181
return self._refresh(instance_uuid, volume_id, connector)
31623182

3183+
except exception.HostConflict as e:
3184+
print(
3185+
f"The command 'nova-manage volume_attachment get_connector' "
3186+
f"may have been run on the wrong compute host. Or the "
3187+
f"instance host may be wrong and in need of repair.\n{e}")
3188+
return 7
31633189
except exception.VolumeBDMNotFound as e:
31643190
print(str(e))
31653191
return 6
@@ -3172,12 +3198,15 @@ def refresh(self, instance_uuid=None, volume_id=None, connector_path=None):
31723198
) as e:
31733199
print(str(e))
31743200
return 4
3175-
except (ValueError, OSError):
3201+
except ValueError as e:
31763202
print(
31773203
f'Failed to open {connector_path}. Does it contain valid '
3178-
f'connector_info data?'
3204+
f'connector_info data?\nError: {str(e)}'
31793205
)
31803206
return 3
3207+
except OSError as e:
3208+
print(str(e))
3209+
return 3
31813210
except exception.InvalidInput as e:
31823211
print(str(e))
31833212
return 2

nova/compute/claims.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(
6464
super().__init__(migration=migration)
6565
# Stash a copy of the instance at the current point of time
6666
self.instance = instance.obj_clone()
67+
self.instance_ref = instance
6768
self.nodename = nodename
6869
self.tracker = tracker
6970
self._pci_requests = pci_requests
@@ -82,7 +83,7 @@ def abort(self):
8283
been aborted.
8384
"""
8485
LOG.debug("Aborting claim: %s", self, instance=self.instance)
85-
self.tracker.abort_instance_claim(self.context, self.instance,
86+
self.tracker.abort_instance_claim(self.context, self.instance_ref,
8687
self.nodename)
8788

8889
def _claim_test(self, compute_node, limits=None):

nova/exception.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2530,3 +2530,7 @@ class NotSupportedComputeForEvacuateV295(NotSupported):
25302530
"instance on destination. To evacuate before upgrades are "
25312531
"complete please use an older microversion. Required version "
25322532
"for compute %(expected), current version %(currently)s")
2533+
2534+
2535+
class HostConflict(Exception):
2536+
pass

nova/pci/manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ def free_instance_allocations(
407407
for dev in self.pci_devs:
408408
if (dev.status == fields.PciDeviceStatus.ALLOCATED and
409409
dev.instance_uuid == instance['uuid']):
410-
self._free_device(dev)
410+
self._free_device(dev, instance)
411411

412412
def free_instance_claims(
413413
self, context: ctx.RequestContext, instance: 'objects.Instance',
@@ -423,7 +423,7 @@ def free_instance_claims(
423423
for dev in self.pci_devs:
424424
if (dev.status == fields.PciDeviceStatus.CLAIMED and
425425
dev.instance_uuid == instance['uuid']):
426-
self._free_device(dev)
426+
self._free_device(dev, instance)
427427

428428
def free_instance(
429429
self, context: ctx.RequestContext, instance: 'objects.Instance',

0 commit comments

Comments
 (0)