Skip to content

Commit 506030c

Browse files
feat: patch https://review.opendev.org/c/openstack/ironic/+/954311 Skip initial reboot to IPA when updating firmware out-of-band
1 parent e2ab955 commit 506030c

File tree

2 files changed

+332
-0
lines changed

2 files changed

+332
-0
lines changed
Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
From 1b2c01185c7907a1d17c4d90cf3bcd73fe345658 Mon Sep 17 00:00:00 2001
2+
From: Jacob Anders <[email protected]>
3+
Date: Tue, 08 Jul 2025 16:25:40 +1000
4+
Subject: [PATCH] Skip initial reboot to IPA when updating firmware out-of-band
5+
6+
This change enables Ironic to skip initial reboot to IPA when
7+
performing out-of-band firmware.
8+
9+
Change-Id: Id055a4ddbde3dbe336717e5f06ca6eb024b90c9f
10+
Signed-off-by: Jacob Anders <[email protected]>
11+
---
12+
13+
diff --git a/ironic/conductor/servicing.py b/ironic/conductor/servicing.py
14+
index 8b385d7..6aaa4a5 100644
15+
--- a/ironic/conductor/servicing.py
16+
+++ b/ironic/conductor/servicing.py
17+
@@ -38,6 +38,7 @@
18+
:param disable_ramdisk: Whether to skip booting ramdisk for servicing.
19+
"""
20+
node = task.node
21+
+ ramdisk_needed = None
22+
try:
23+
# NOTE(ghe): Valid power and network values are needed to perform
24+
# a service operation.
25+
@@ -54,42 +55,53 @@
26+
node.set_driver_internal_info('service_disable_ramdisk',
27+
disable_ramdisk)
28+
task.node.save()
29+
-
30+
utils.node_update_cache(task)
31+
32+
- # Allow the deploy driver to set up the ramdisk again (necessary for IPA)
33+
- try:
34+
- if not disable_ramdisk:
35+
- prepare_result = task.driver.deploy.prepare_service(task)
36+
- else:
37+
- LOG.info('Skipping preparing for service in-band service since '
38+
- 'out-of-band only service has been requested for node '
39+
- '%s', node.uuid)
40+
- prepare_result = None
41+
- except Exception as e:
42+
- msg = (_('Failed to prepare node %(node)s for service: %(e)s')
43+
- % {'node': node.uuid, 'e': e})
44+
- return utils.servicing_error_handler(task, msg, traceback=True)
45+
-
46+
- if prepare_result == states.SERVICEWAIT:
47+
- # Prepare is asynchronous, the deploy driver will need to
48+
- # set node.driver_internal_info['service_steps'] and
49+
- # node.service_step and then make an RPC call to
50+
- # continue_node_service to start service operations.
51+
- task.process_event('wait')
52+
- return
53+
try:
54+
conductor_steps.set_node_service_steps(
55+
task, disable_ramdisk=disable_ramdisk)
56+
+ except exception.InvalidParameterValue:
57+
+ if disable_ramdisk:
58+
+ # NOTE(janders) raising log severity since this will result
59+
+ # in servicing failure
60+
+ LOG.error('Failed to compose list of service steps for node '
61+
+ '%s', node.uuid)
62+
+ raise
63+
+ LOG.debug('Unable to compose list of service steps for node %(node)s '
64+
+ 'will retry once ramdisk is booted.', {'node': node.uuid})
65+
+ ramdisk_needed = True
66+
except Exception as e:
67+
- # Catch all exceptions and follow the error handling
68+
- # path so things are cleaned up properly.
69+
msg = (_('Cannot service node %(node)s: %(msg)s')
70+
% {'node': node.uuid, 'msg': e})
71+
return utils.servicing_error_handler(task, msg)
72+
73+
steps = node.driver_internal_info.get('service_steps', [])
74+
step_index = 0 if steps else None
75+
+ for step in steps:
76+
+ step_requires_ramdisk = step.get('requires_ramdisk')
77+
+ if step_requires_ramdisk:
78+
+ LOG.debug('Found service step %s requiring ramdisk '
79+
+ 'for node %s', step, task.node.uuid)
80+
+ ramdisk_needed = True
81+
+
82+
+ if ramdisk_needed and not disable_ramdisk:
83+
+ try:
84+
+ prepare_result = task.driver.deploy.prepare_service(task)
85+
+ except Exception as e:
86+
+ msg = (_('Failed to prepare node %(node)s for service: %(e)s')
87+
+ % {'node': node.uuid, 'e': e})
88+
+ return utils.servicing_error_handler(task, msg, traceback=True)
89+
+ if prepare_result == states.SERVICEWAIT:
90+
+ # Prepare is asynchronous, the deploy driver will need to
91+
+ # set node.driver_internal_info['service_steps'] and
92+
+ # node.service_step and then make an RPC call to
93+
+ # continue_node_service to start service operations.
94+
+ task.process_event('wait')
95+
+ return
96+
+ else:
97+
+ LOG.debug('Will proceed with servicing node %(node)s '
98+
+ 'without booting the ramdisk.', {'node': node.uuid})
99+
+
100+
do_next_service_step(task, step_index, disable_ramdisk=disable_ramdisk)
101+
102+
103+
diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py
104+
index 39f75fbe..9112e6d 100644
105+
--- a/ironic/conductor/steps.py
106+
+++ b/ironic/conductor/steps.py
107+
@@ -496,9 +496,11 @@
108+
are accepted.
109+
:raises: InvalidParameterValue if there is a problem with the user's
110+
clean steps.
111+
- :raises: NodeCleaningFailure if there was a problem getting the
112+
+ :raises: NodeServicingFailure if there was a problem getting the
113+
service steps.
114+
"""
115+
+ # FIXME(janders) it seems NodeServicingFailure is never actually raised,
116+
+ # perhaps we should remove it?
117+
node = task.node
118+
steps = _validate_user_service_steps(
119+
task, node.driver_internal_info.get('service_steps', []),
120+
diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py
121+
index 8afb909..f501b33 100644
122+
--- a/ironic/drivers/modules/deploy_utils.py
123+
+++ b/ironic/drivers/modules/deploy_utils.py
124+
@@ -1750,7 +1750,7 @@
125+
task.driver.boot.prepare_ramdisk(task, deploy_opts)
126+
127+
128+
-def reboot_to_finish_step(task, timeout=None):
129+
+def reboot_to_finish_step(task, timeout=None, disable_ramdisk=None):
130+
"""Reboot the node into IPA to finish a deploy/clean step.
131+
132+
:param task: a TaskManager instance.
133+
@@ -1759,8 +1759,14 @@
134+
:returns: states.CLEANWAIT if cleaning operation in progress
135+
or states.DEPLOYWAIT if deploy operation in progress.
136+
"""
137+
- disable_ramdisk = task.node.driver_internal_info.get(
138+
- 'cleaning_disable_ramdisk')
139+
+ if disable_ramdisk is None:
140+
+ if task.node.provision_state in [states.CLEANING, states.CLEANWAIT]:
141+
+ disable_ramdisk = task.node.driver_internal_info.get(
142+
+ 'cleaning_disable_ramdisk')
143+
+ elif task.node.provision_state in [states.SERVICING,
144+
+ states.SERVICEWAIT]:
145+
+ disable_ramdisk = task.node.driver_internal_info.get(
146+
+ 'service_disable_ramdisk')
147+
if not disable_ramdisk:
148+
if (manager_utils.is_fast_track(task)
149+
and not task.node.disable_power_off):
150+
diff --git a/ironic/drivers/modules/redfish/firmware.py b/ironic/drivers/modules/redfish/firmware.py
151+
index 2500cd5..555a921 100644
152+
--- a/ironic/drivers/modules/redfish/firmware.py
153+
+++ b/ironic/drivers/modules/redfish/firmware.py
154+
@@ -146,7 +146,7 @@
155+
requires_ramdisk=True)
156+
@base.service_step(priority=0, abortable=False,
157+
argsinfo=_FW_SETTINGS_ARGSINFO,
158+
- requires_ramdisk=True)
159+
+ requires_ramdisk=False)
160+
@base.cache_firmware_components
161+
def update(self, task, settings):
162+
"""Update the Firmware on the node using the settings for components.
163+
@@ -181,7 +181,8 @@
164+
polling=True
165+
)
166+
167+
- return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval)
168+
+ return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval,
169+
+ disable_ramdisk=True)
170+
171+
def _execute_firmware_update(self, node, update_service, settings):
172+
"""Executes the next firmware update to the node
173+
diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py
174+
index 22c2612..b46394d 100644
175+
--- a/ironic/tests/unit/conductor/test_servicing.py
176+
+++ b/ironic/tests/unit/conductor/test_servicing.py
177+
@@ -109,14 +109,21 @@
178+
mock_validate.side_effect = exception.NetworkError()
179+
self.__do_node_service_validate_fail(mock_validate)
180+
181+
+ @mock.patch.object(conductor_steps, 'set_node_service_steps',
182+
+ autospec=True)
183+
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
184+
autospec=True)
185+
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
186+
autospec=True)
187+
def test__do_node_service_prepare_service_fail(self, mock_prep,
188+
mock_validate,
189+
- service_steps=None):
190+
- # Exception from task.driver.deploy.prepare_cleaning should cause node
191+
+ mock_steps,
192+
+ service_steps=[]):
193+
+ # NOTE(janders) after removing unconditional initial reboot into
194+
+ # ramdisk, set_node_service_steps needs to InvalidParameterValue
195+
+ # to force boot into ramdisk
196+
+ mock_steps.side_effect = exception.InvalidParameterValue('error')
197+
+ # Exception from task.driver.deploy.prepare_service should cause node
198+
# to go to SERVICEFAIL
199+
mock_prep.side_effect = exception.InvalidParameterValue('error')
200+
tgt_prov_state = states.ACTIVE
201+
@@ -135,17 +142,76 @@
202+
self.assertFalse(node.maintenance)
203+
self.assertIsNone(node.fault)
204+
205+
+ @mock.patch.object(conductor_steps, 'set_node_service_steps',
206+
+ autospec=True)
207+
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
208+
autospec=True)
209+
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
210+
autospec=True)
211+
def test__do_node_service_prepare_service_wait(self, mock_prep,
212+
- mock_validate):
213+
+ mock_validate,
214+
+ mock_steps):
215+
service_steps = [
216+
{'step': 'trigger_servicewait', 'priority': 10,
217+
'interface': 'vendor'}
218+
]
219+
+ mock_steps.side_effect = exception.InvalidParameterValue('error')
220+
+ mock_prep.return_value = states.SERVICEWAIT
221+
+ tgt_prov_state = states.ACTIVE
222+
+ node = obj_utils.create_test_node(
223+
+ self.context, driver='fake-hardware',
224+
+ provision_state=states.SERVICING,
225+
+ target_provision_state=tgt_prov_state,
226+
+ vendor_interface='fake')
227+
+ with task_manager.acquire(
228+
+ self.context, node.uuid, shared=False) as task:
229+
+ servicing.do_node_service(task, service_steps=service_steps)
230+
+ node.refresh()
231+
+ self.assertEqual(states.SERVICEWAIT, node.provision_state)
232+
+ self.assertEqual(tgt_prov_state, node.target_provision_state)
233+
+ mock_prep.assert_called_once_with(mock.ANY, mock.ANY)
234+
+ mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
235+
236+
+ @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
237+
+ autospec=True)
238+
+ @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
239+
+ autospec=True)
240+
+ def test__do_node_service_out_of_band(self, mock_prep,
241+
+ mock_validate):
242+
+ # NOTE(janders) this test ensures ramdisk isn't prepared if
243+
+ # steps do not require it
244+
+ service_steps = [
245+
+ {'step': 'trigger_servicewait', 'priority': 10,
246+
+ 'interface': 'vendor'}
247+
+ ]
248+
+ mock_prep.return_value = states.SERVICEWAIT
249+
+ tgt_prov_state = states.ACTIVE
250+
+ node = obj_utils.create_test_node(
251+
+ self.context, driver='fake-hardware',
252+
+ provision_state=states.SERVICING,
253+
+ target_provision_state=tgt_prov_state,
254+
+ vendor_interface='fake')
255+
+ with task_manager.acquire(
256+
+ self.context, node.uuid, shared=False) as task:
257+
+ servicing.do_node_service(task, service_steps=service_steps)
258+
+ node.refresh()
259+
+ self.assertEqual(states.SERVICEWAIT, node.provision_state)
260+
+ self.assertEqual(tgt_prov_state, node.target_provision_state)
261+
+ mock_prep.assert_not_called()
262+
+ mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
263+
+
264+
+ @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
265+
+ autospec=True)
266+
+ @mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
267+
+ autospec=True)
268+
+ def test__do_node_service_requires_ramdisk_fallback(self, mock_prep,
269+
+ mock_validate):
270+
+ # NOTE(janders): here we set 'requires_ramdisk': 'True'
271+
+ # to force ramdisk_needed to be set and trigger prepare ramdisk
272+
+ service_steps = [
273+
+ {'step': 'trigger_servicewait', 'priority': 10,
274+
+ 'interface': 'vendor', 'requires_ramdisk': 'True'}
275+
+ ]
276+
mock_prep.return_value = states.SERVICEWAIT
277+
tgt_prov_state = states.ACTIVE
278+
node = obj_utils.create_test_node(
279+
@@ -194,11 +260,8 @@
280+
@mock.patch.object(conductor_steps, 'set_node_service_steps',
281+
autospec=True)
282+
def __do_node_service_steps_fail(self, mock_steps, mock_validate,
283+
- service_steps=None, invalid_exc=True):
284+
- if invalid_exc:
285+
- mock_steps.side_effect = exception.InvalidParameterValue('invalid')
286+
- else:
287+
- mock_steps.side_effect = exception.NodeCleaningFailure('failure')
288+
+ service_steps=None):
289+
+ mock_steps.side_effect = exception.NodeCleaningFailure('failure')
290+
tgt_prov_state = states.ACTIVE
291+
node = obj_utils.create_test_node(
292+
self.context, driver='fake-hardware',
293+
@@ -224,12 +287,8 @@
294+
def test_do_node_service_steps_fail_poweroff(self, mock_steps,
295+
mock_validate,
296+
mock_power,
297+
- service_steps=None,
298+
- invalid_exc=True):
299+
- if invalid_exc:
300+
- mock_steps.side_effect = exception.InvalidParameterValue('invalid')
301+
- else:
302+
- mock_steps.side_effect = exception.NodeCleaningFailure('failure')
303+
+ service_steps=None):
304+
+ mock_steps.side_effect = exception.NodeCleaningFailure('failure')
305+
tgt_prov_state = states.ACTIVE
306+
self.config(poweroff_in_cleanfail=True, group='conductor')
307+
node = obj_utils.create_test_node(
308+
@@ -249,9 +308,7 @@
309+
self.assertFalse(mock_power.called)
310+
311+
def test__do_node_service_steps_fail(self):
312+
- for invalid in (True, False):
313+
- self.__do_node_service_steps_fail(service_steps=[self.deploy_raid],
314+
- invalid_exc=invalid)
315+
+ self.__do_node_service_steps_fail(service_steps=[self.deploy_raid])
316+
317+
@mock.patch.object(conductor_steps, 'set_node_service_steps',
318+
autospec=True)
319+
diff --git a/releasenotes/notes/servicing-remove-initial-reboot-into-ramdisk-c1840524832435c2.yaml b/releasenotes/notes/servicing-remove-initial-reboot-into-ramdisk-c1840524832435c2.yaml
320+
new file mode 100644
321+
index 0000000..296779a
322+
--- /dev/null
323+
+++ b/releasenotes/notes/servicing-remove-initial-reboot-into-ramdisk-c1840524832435c2.yaml
324+
@@ -0,0 +1,7 @@
325+
+---
326+
+fixes:
327+
+ - |
328+
+ Removes initial, unconditional reboot into ramdisk during servicing when
329+
+ not required by specified service steps. This reduces the total number of
330+
+ reboots needed when performing servicing, speeding up the process.
331+
+

containers/ironic/patches/series

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
0001_grub_config.patch
2+
0002_skip_reboot_firmware_update.patch

0 commit comments

Comments
 (0)