Skip to content

Commit 70f1a89

Browse files
committed
Add badfish support for iDRAC cleanup operations
This commit adds support for using badfish container to perform iDRAC cleanup operations on Dell hardware. Badfish is used to clear the iDRAC job queue and reset the iDRAC service to improve stability during boot operations. It does not replace redfish_command or URI modules, which continue to be used for standard Redfish operations. Changes: - Created new 'badfish' Ansible role with install.yml and call.yml tasks - Added 'reset_idrac' parameter to control badfish-based iDRAC cleanup operations - Integrated badfish container installation into bastion bootstrap process - Updated boot-iso/dell.yml to use badfish for: - Clearing iDRAC job queue (always executed) - Resetting iDRAC service (when reset_idrac is enabled) - Waiting for iDRAC to be available after reset - Replaced fixed pause with wait_for module to verify host power down - All badfish operations use quay.io/quads/badfish container image The badfish role provides a reusable call.yml task file that accepts badfish_host, badfish_user, badfish_password, and badfish_args parameters, making it easy to call badfish commands from other roles. The 'reset_idrac' parameter pulls and uses the badfish container to perform iDRAC cleanup operations, which helps resolve issues with stuck job queues and improves iDRAC stability during virtual media boot operations. AI Model: Claude Sonnet 4.5
1 parent d90d48a commit 70f1a89

File tree

6 files changed

+191
-23
lines changed

6 files changed

+191
-23
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
---
2+
# badfish call tasks
3+
# Reusable task file to run badfish container with various options
4+
#
5+
# Required variables:
6+
# badfish_host: BMC address or hostname
7+
# badfish_user: BMC username
8+
# badfish_password: BMC password
9+
# badfish_args: List of badfish command and its arguments (e.g., ['--power-on'] or ['--mount-virtual-media', 'http://example.com/image.iso'])
10+
#
11+
# Optional variables:
12+
# badfish_dns: DNS server IP (for VPN environments)
13+
# badfish_no_log: Set to false to log command output (default: false)
14+
# delay: Delay between retries in seconds (default: omit)
15+
# retries: Number of retries (default: omit)
16+
17+
- name: Validate required badfish parameters
18+
ansible.builtin.assert:
19+
that:
20+
- badfish_host is defined
21+
- badfish_host | length > 0
22+
- badfish_user is defined
23+
- badfish_user | length > 0
24+
- badfish_password is defined
25+
- badfish_password | length > 0
26+
- badfish_args is defined
27+
- badfish_args | length > 0
28+
fail_msg: "Missing or empty required badfish parameters. Required: badfish_host, badfish_user, badfish_password, badfish_args"
29+
quiet: true
30+
31+
- name: Build badfish command
32+
vars:
33+
_badfish_podman_cmd: >-
34+
{{
35+
['podman', 'run', '--rm'] +
36+
(['--dns', badfish_dns] if badfish_dns is defined else [])
37+
}}
38+
_badfish_badfish_cmd: >-
39+
{{
40+
['-H', badfish_host, '-u', badfish_user, '-p', badfish_password] +
41+
(badfish_args | default([]))
42+
}}
43+
_badfish_full_cmd: "{{ _badfish_podman_cmd + ['quay.io/quads/badfish'] + _badfish_badfish_cmd }}"
44+
ansible.builtin.set_fact:
45+
badfish_command_list: "{{ _badfish_full_cmd }}"
46+
47+
- name: Display badfish command
48+
ansible.builtin.debug:
49+
msg: "Running badfish command: {{ badfish_command_list | join(' ') }}"
50+
51+
- name: Run badfish container
52+
ansible.builtin.command:
53+
argv: "{{ badfish_command_list }}"
54+
no_log: "{{ badfish_no_log | default(false) }}"
55+
register: badfish_result
56+
failed_when: badfish_result.rc != 0
57+
delay: "{{ delay | default(omit) }}"
58+
retries: "{{ retries | default(omit) }}"
59+
until: not badfish_result.failed
60+
61+
- name: Display badfish stdout
62+
ansible.builtin.debug:
63+
msg: "badfish stdout: {{ badfish_result.stdout }}"
64+
when: badfish_result.stdout is defined
65+
66+
- name: Display badfish stderr
67+
ansible.builtin.debug:
68+
msg: "badfish stderr: {{ badfish_result.stderr }}"
69+
when: badfish_result.stderr is defined
70+
71+
- name: Fail if badfish reports an error in stderr
72+
ansible.builtin.fail:
73+
msg: "badfish reported an error: {{ badfish_result.stderr }}"
74+
when:
75+
- not ignore_errors | default(false) | bool
76+
- badfish_result.stderr is defined and 'ERROR' in badfish_result.stderr
77+
78+
- name: Set fact with badfish result for external access
79+
ansible.builtin.set_fact:
80+
badfish_command_result: "{{ badfish_result }}"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
# badfish install tasks
3+
# Pulls the badfish container image from quay.io
4+
5+
- name: Ensure podman package is installed
6+
ansible.builtin.package:
7+
name: podman
8+
state: present
9+
10+
- name: Pull badfish container image
11+
ansible.builtin.command:
12+
cmd: podman pull quay.io/quads/badfish

ansible/roles/bastion-install/defaults/main.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@ rh_crucible_url: https://github.com/perftool-incubator/crucible
1414

1515
# Since the use of tc on the bastion machine is rare, we disable rebooting the bastion machine by default
1616
bastion_install_tc_reboot: false
17+
18+
# Reset iDRAC service using badfish container (pulls and uses badfish container
19+
# to clear job queue and reset iDRAC service)
20+
reset_idrac: false

ansible/roles/bastion-install/tasks/main.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
disable_gpg_check: yes
4949
when: ansible_facts['distribution_major_version'] is version('9', '>=')
5050

51+
- name: Install badfish container image
52+
include_role:
53+
name: badfish
54+
tasks_from: install
55+
when: reset_idrac | bool
56+
5157
- name: Install python
5258
pip:
5359
name: python-hpilo

ansible/roles/boot-iso/tasks/dell.yml

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,79 @@
22
# Dell tasks for booting an iso
33

44
- name: Set Virtual Media ISO
5-
set_fact:
5+
ansible.builtin.set_fact:
66
_virtual_media_iso: "{{ virtual_media_iso | default(hostvars[item]['boot_iso']) }}"
77

8+
- name: "Dell - Clear iDrac job queue for {{ item }} (badfish)"
9+
ansible.builtin.include_role:
10+
name: badfish
11+
tasks_from: call
12+
vars:
13+
badfish_host: "{{ hostvars[item]['bmc_address'] }}"
14+
badfish_user: "{{ hostvars[item]['bmc_user'] }}"
15+
badfish_password: "{{ hostvars[item]['bmc_password'] }}"
16+
badfish_args:
17+
- "--clear-jobs"
18+
- "--force"
19+
ignore_errors: true
20+
when: reset_idrac | bool
21+
822
- name: "Dell - Power down machine prior to booting iso for {{ item }}"
9-
shell: |
10-
ipmitool -I lanplus -H {{ hostvars[item]['bmc_address'] }} -U {{ hostvars[item]['bmc_user'] }} -P {{ hostvars[item]['bmc_password'] }} chassis power off
23+
ansible.builtin.command:
24+
cmd: >-
25+
ipmitool -I lanplus -H "{{ hostvars[item]['bmc_address'] }}"
26+
-U "{{ hostvars[item]['bmc_user'] }}"
27+
-P "{{ hostvars[item]['bmc_password'] }}" chassis power off
1128
ignore_errors: true
1229
register: ipmi_poweroff
1330

14-
- name: "Dell - Pause for power down for {{ item }}"
15-
pause:
16-
seconds: 10
31+
- name: "Dell - Reset iDRAC for {{ item }} (badfish)"
32+
ansible.builtin.include_role:
33+
name: badfish
34+
tasks_from: call
35+
vars:
36+
badfish_host: "{{ hostvars[item]['bmc_address'] }}"
37+
badfish_user: "{{ hostvars[item]['bmc_user'] }}"
38+
badfish_password: "{{ hostvars[item]['bmc_password'] }}"
39+
badfish_args:
40+
- "--racreset"
41+
ignore_errors: true
42+
when: reset_idrac | bool
43+
44+
- name: "Dell - Wait for power down for {{ item }}"
45+
ansible.builtin.wait_for:
46+
port: 22
47+
delay: 2
48+
state: stopped
49+
host: "{{ hostvars[item]['ansible_host'] | default(hostvars[item]['inventory_hostname']) }}"
50+
timeout: 60
1751
when: not ipmi_poweroff.failed
1852

19-
- name: Dell - Set OneTimeBoot VirtualCD
20-
uri:
21-
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/Actions/Oem/EID_674_Manager.ImportSystemConfiguration"
53+
- name: "Ensure iDRAC reset order is passed for {{ item }}"
54+
when: reset_idrac | bool
55+
ansible.builtin.pause:
56+
seconds: 30
57+
58+
- name: "Dell - Wait for iDRAC to be available for {{ item }}"
59+
ansible.builtin.uri:
60+
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1"
2261
user: "{{ hostvars[item]['bmc_user'] }}"
2362
password: "{{ hostvars[item]['bmc_password'] }}"
24-
method: POST
63+
method: GET
2564
headers:
2665
content-type: application/json
2766
Accept: application/json
28-
body:
29-
{
30-
"ShareParameters": { "Target": "ALL" },
31-
"ImportBuffer": '<SystemConfiguration><Component FQDD="iDRAC.Embedded.1"><Attribute Name="ServerBoot.1#BootOnce">Enabled</Attribute><Attribute Name="ServerBoot.1#FirstBootDevice">VCD-DVD</Attribute></Component></SystemConfiguration>',
32-
}
33-
body_format: json
34-
validate_certs: no
35-
status_code: 202
36-
return_content: yes
67+
validate_certs: false
68+
status_code: [200, 201, 301, 302]
69+
register: racreset_result
70+
until: racreset_result.status in [200, 201, 301, 302]
71+
retries: 60
72+
delay: 5
73+
failed_when: false
74+
when: reset_idrac | bool
3775

3876
- name: "Dell - Check for Virtual Media for {{ item }}"
39-
uri:
77+
ansible.builtin.uri:
4078
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD"
4179
user: "{{ hostvars[item]['bmc_user'] }}"
4280
password: "{{ hostvars[item]['bmc_password'] }}"
@@ -50,12 +88,15 @@
5088
status_code: 200
5189
return_content: yes
5290
register: check_virtual_media
91+
retries: 10
92+
delay: 10
93+
until: check_virtual_media.status == 200
5394

5495
- name: Block to rescue incase of stuck virtual media
5596
when: check_virtual_media.json.Image
5697
block:
5798
- name: "Dell - Eject any CD Virtual Media for {{ item }}"
58-
uri:
99+
ansible.builtin.uri:
59100
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia"
60101
user: "{{ hostvars[item]['bmc_user'] }}"
61102
password: "{{ hostvars[item]['bmc_password'] }}"
@@ -72,15 +113,17 @@
72113
rescue:
73114
# Use racadm to address the failed redfish unmount of old virtual media
74115
- name: "Force mount of a existing image for {{ item }}"
75-
raw: racadm remoteimage -c -u "" -p "" -l http://{{ http_store_host }}:{{ http_store_port }}/{{ _virtual_media_iso }}
116+
ansible.builtin.raw: >-
117+
racadm remoteimage -c -u "" -p "" -l http://{{ http_store_host }}:{{ http_store_port }}/{{ _virtual_media_iso }}
76118
delegate_to: "{{ hostvars[item]['bmc_address'] }}"
77119
vars:
78120
ansible_user: "{{ hostvars[item]['bmc_user'] }}"
79121
ansible_password: "{{ hostvars[item]['bmc_password'] }}"
80122
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'
81123

82124
- name: "Force unmount of the existing image for {{ item }}"
83-
raw: racadm remoteimage -d
125+
ansible.builtin.raw: >-
126+
racadm remoteimage -d
84127
delegate_to: "{{ hostvars[item]['bmc_address'] }}"
85128
vars:
86129
ansible_user: "{{ hostvars[item]['bmc_user'] }}"
@@ -106,6 +149,25 @@
106149
retries: 10
107150
delay: 30
108151

152+
- name: Dell - Set OneTimeBoot VirtualCD
153+
ansible.builtin.uri:
154+
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/Actions/Oem/EID_674_Manager.ImportSystemConfiguration"
155+
user: "{{ hostvars[item]['bmc_user'] }}"
156+
password: "{{ hostvars[item]['bmc_password'] }}"
157+
method: POST
158+
headers:
159+
content-type: application/json
160+
Accept: application/json
161+
body:
162+
{
163+
"ShareParameters": { "Target": "ALL" },
164+
"ImportBuffer": '<SystemConfiguration><Component FQDD="iDRAC.Embedded.1"><Attribute Name="ServerBoot.1#BootOnce">Enabled</Attribute><Attribute Name="ServerBoot.1#FirstBootDevice">VCD-DVD</Attribute></Component></SystemConfiguration>',
165+
}
166+
body_format: json
167+
validate_certs: no
168+
status_code: 202
169+
return_content: yes
170+
109171
- name: "DELL - Power ON for {{ item }}"
110172
community.general.redfish_command:
111173
category: Systems

ansible/vars/all.sample.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ setup_bastion_registry: false
7171
# Use in conjunction with ipv6 based clusters
7272
use_bastion_registry: false
7373

74+
# Reset iDRAC service using badfish container (pulls and uses badfish container
75+
# to clear job queue and reset iDRAC service)
76+
reset_idrac: false
77+
7478
################################################################################
7579
# OCP node vars
7680
################################################################################

0 commit comments

Comments
 (0)