Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions ansible/roles/badfish/tasks/call.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
# badfish call tasks
# Reusable task file to run badfish container with various options
#
# Required variables:
# badfish_host: BMC address or hostname
# badfish_user: BMC username
# badfish_password: BMC password
# badfish_args: List of badfish command and its arguments (e.g., ['--power-on'] or ['--mount-virtual-media', 'http://example.com/image.iso'])
#
# Optional variables:
# badfish_dns: DNS server IP (for VPN environments)
# badfish_no_log: Set to false to log command output (default: false)
# delay: Delay between retries in seconds (default: omit)
# retries: Number of retries (default: omit)

- name: Validate required badfish parameters
ansible.builtin.assert:
that:
- badfish_host is defined
- badfish_host | length > 0
- badfish_user is defined
- badfish_user | length > 0
- badfish_password is defined
- badfish_password | length > 0
- badfish_args is defined
- badfish_args | length > 0
fail_msg: "Missing or empty required badfish parameters. Required: badfish_host, badfish_user, badfish_password, badfish_args"
quiet: true

- name: Build badfish command
vars:
_badfish_podman_cmd: >-
{{
['podman', 'run', '--rm'] +
(['--dns', badfish_dns] if badfish_dns is defined else [])
}}
_badfish_badfish_cmd: >-
{{
['-H', badfish_host, '-u', badfish_user, '-p', badfish_password] +
(badfish_args | default([]))
}}
_badfish_full_cmd: "{{ _badfish_podman_cmd + ['quay.io/quads/badfish'] + _badfish_badfish_cmd }}"
ansible.builtin.set_fact:
badfish_command_list: "{{ _badfish_full_cmd }}"

- name: Display badfish command
ansible.builtin.debug:
msg: "Running badfish command: {{ badfish_command_list | join(' ') }}"

- name: Run badfish container
ansible.builtin.command:
argv: "{{ badfish_command_list }}"
no_log: "{{ badfish_no_log | default(false) }}"
register: badfish_result
failed_when: badfish_result.rc != 0
delay: "{{ delay | default(omit) }}"
retries: "{{ retries | default(omit) }}"
until: not badfish_result.failed

- name: Display badfish stdout
ansible.builtin.debug:
msg: "badfish stdout: {{ badfish_result.stdout }}"
when: badfish_result.stdout is defined

- name: Display badfish stderr
ansible.builtin.debug:
msg: "badfish stderr: {{ badfish_result.stderr }}"
when: badfish_result.stderr is defined

- name: Fail if badfish reports an error in stderr
ansible.builtin.fail:
msg: "badfish reported an error: {{ badfish_result.stderr }}"
when:
- not ignore_errors | default(false) | bool
- badfish_result.stderr is defined and 'ERROR' in badfish_result.stderr

- name: Set fact with badfish result for external access
ansible.builtin.set_fact:
badfish_command_result: "{{ badfish_result }}"
12 changes: 12 additions & 0 deletions ansible/roles/badfish/tasks/install.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
# badfish install tasks
# Pulls the badfish container image from quay.io

- name: Ensure podman package is installed
ansible.builtin.package:
name: podman
state: present

- name: Pull badfish container image
ansible.builtin.command:
cmd: podman pull quay.io/quads/badfish
4 changes: 4 additions & 0 deletions ansible/roles/bastion-install/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@ rh_crucible_url: https://github.com/perftool-incubator/crucible

# Since the use of tc on the bastion machine is rare, we disable rebooting the bastion machine by default
bastion_install_tc_reboot: false

# Reset iDRAC service using badfish container (pulls and uses badfish container
# to clear job queue and reset iDRAC service)
reset_idrac: false
6 changes: 6 additions & 0 deletions ansible/roles/bastion-install/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
disable_gpg_check: yes
when: ansible_facts['distribution_major_version'] is version('9', '>=')

- name: Install badfish container image
include_role:
name: badfish
tasks_from: install
when: reset_idrac | bool

- name: Install python
pip:
name: python-hpilo
Expand Down
4 changes: 4 additions & 0 deletions ansible/roles/boot-iso/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ http_store_port: 8081
# For a scale out it indicates how many worker nodes are already deployed
# and should not be included in the scale out from the inventory.
offset: 0

# Reset iDRAC service using badfish container (pulls and uses badfish container
# to clear job queue and reset iDRAC service)
reset_idrac: false
108 changes: 85 additions & 23 deletions ansible/roles/boot-iso/tasks/dell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,79 @@
# Dell tasks for booting an iso

- name: Set Virtual Media ISO
set_fact:
ansible.builtin.set_fact:
_virtual_media_iso: "{{ virtual_media_iso | default(hostvars[item]['boot_iso']) }}"

- name: "Dell - Clear iDrac job queue for {{ item }} (badfish)"
ansible.builtin.include_role:
name: badfish
tasks_from: call
vars:
badfish_host: "{{ hostvars[item]['bmc_address'] }}"
badfish_user: "{{ hostvars[item]['bmc_user'] }}"
badfish_password: "{{ hostvars[item]['bmc_password'] }}"
badfish_args:
- "--clear-jobs"
- "--force"
ignore_errors: true
when: reset_idrac | bool

- name: "Dell - Power down machine prior to booting iso for {{ item }}"
shell: |
ipmitool -I lanplus -H {{ hostvars[item]['bmc_address'] }} -U {{ hostvars[item]['bmc_user'] }} -P {{ hostvars[item]['bmc_password'] }} chassis power off
ansible.builtin.command:
cmd: >-
ipmitool -I lanplus -H "{{ hostvars[item]['bmc_address'] }}"
-U "{{ hostvars[item]['bmc_user'] }}"
-P "{{ hostvars[item]['bmc_password'] }}" chassis power off
ignore_errors: true
register: ipmi_poweroff

- name: "Dell - Pause for power down for {{ item }}"
pause:
seconds: 10
- name: "Dell - Reset iDRAC for {{ item }} (badfish)"
ansible.builtin.include_role:
name: badfish
tasks_from: call
vars:
badfish_host: "{{ hostvars[item]['bmc_address'] }}"
badfish_user: "{{ hostvars[item]['bmc_user'] }}"
badfish_password: "{{ hostvars[item]['bmc_password'] }}"
badfish_args:
- "--racreset"
ignore_errors: true
when: reset_idrac | bool

- name: "Dell - Wait for power down for {{ item }}"
ansible.builtin.wait_for:
port: 22
delay: 2
state: stopped
host: "{{ hostvars[item]['ansible_host'] | default(hostvars[item]['inventory_hostname']) }}"
timeout: 60
when: not ipmi_poweroff.failed

- name: Dell - Set OneTimeBoot VirtualCD
uri:
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/Actions/Oem/EID_674_Manager.ImportSystemConfiguration"
- name: "Ensure iDRAC reset order is passed for {{ item }}"
when: reset_idrac | bool
ansible.builtin.pause:
seconds: 30

- name: "Dell - Wait for iDRAC to be available for {{ item }}"
ansible.builtin.uri:
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1"
user: "{{ hostvars[item]['bmc_user'] }}"
password: "{{ hostvars[item]['bmc_password'] }}"
method: POST
method: GET
headers:
content-type: application/json
Accept: application/json
body:
{
"ShareParameters": { "Target": "ALL" },
"ImportBuffer": '<SystemConfiguration><Component FQDD="iDRAC.Embedded.1"><Attribute Name="ServerBoot.1#BootOnce">Enabled</Attribute><Attribute Name="ServerBoot.1#FirstBootDevice">VCD-DVD</Attribute></Component></SystemConfiguration>',
}
body_format: json
validate_certs: no
status_code: 202
return_content: yes
validate_certs: false
status_code: [200, 201, 301, 302]
register: racreset_result
until: racreset_result.status in [200, 201, 301, 302]
retries: 60
delay: 5
failed_when: false
when: reset_idrac | bool

- name: "Dell - Check for Virtual Media for {{ item }}"
uri:
ansible.builtin.uri:
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD"
user: "{{ hostvars[item]['bmc_user'] }}"
password: "{{ hostvars[item]['bmc_password'] }}"
Expand All @@ -50,12 +88,15 @@
status_code: 200
return_content: yes
register: check_virtual_media
retries: 10
delay: 10
until: check_virtual_media.status == 200

- name: Block to rescue incase of stuck virtual media
when: check_virtual_media.json.Image
block:
- name: "Dell - Eject any CD Virtual Media for {{ item }}"
uri:
ansible.builtin.uri:
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/VirtualMedia/CD/Actions/VirtualMedia.EjectMedia"
user: "{{ hostvars[item]['bmc_user'] }}"
password: "{{ hostvars[item]['bmc_password'] }}"
Expand All @@ -72,15 +113,17 @@
rescue:
# Use racadm to address the failed redfish unmount of old virtual media
- name: "Force mount of a existing image for {{ item }}"
raw: racadm remoteimage -c -u "" -p "" -l http://{{ http_store_host }}:{{ http_store_port }}/{{ _virtual_media_iso }}
ansible.builtin.raw: >-
racadm remoteimage -c -u "" -p "" -l http://{{ http_store_host }}:{{ http_store_port }}/{{ _virtual_media_iso }}
delegate_to: "{{ hostvars[item]['bmc_address'] }}"
vars:
ansible_user: "{{ hostvars[item]['bmc_user'] }}"
ansible_password: "{{ hostvars[item]['bmc_password'] }}"
ansible_ssh_common_args: '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null'

- name: "Force unmount of the existing image for {{ item }}"
raw: racadm remoteimage -d
ansible.builtin.raw: >-
racadm remoteimage -d
delegate_to: "{{ hostvars[item]['bmc_address'] }}"
vars:
ansible_user: "{{ hostvars[item]['bmc_user'] }}"
Expand All @@ -106,6 +149,25 @@
retries: 10
delay: 30

- name: Dell - Set OneTimeBoot VirtualCD
ansible.builtin.uri:
url: "https://{{ hostvars[item]['bmc_address'] }}/redfish/v1/Managers/iDRAC.Embedded.1/Actions/Oem/EID_674_Manager.ImportSystemConfiguration"
user: "{{ hostvars[item]['bmc_user'] }}"
password: "{{ hostvars[item]['bmc_password'] }}"
method: POST
headers:
content-type: application/json
Accept: application/json
body:
{
"ShareParameters": { "Target": "ALL" },
"ImportBuffer": '<SystemConfiguration><Component FQDD="iDRAC.Embedded.1"><Attribute Name="ServerBoot.1#BootOnce">Enabled</Attribute><Attribute Name="ServerBoot.1#FirstBootDevice">VCD-DVD</Attribute></Component></SystemConfiguration>',
}
body_format: json
validate_certs: no
status_code: 202
return_content: yes

- name: "DELL - Power ON for {{ item }}"
community.general.redfish_command:
category: Systems
Expand Down
4 changes: 4 additions & 0 deletions ansible/vars/all.sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ setup_bastion_registry: false
# Use in conjunction with ipv6 based clusters
use_bastion_registry: false

# Reset iDRAC service using badfish container (pulls and uses badfish container
# to clear job queue and reset iDRAC service)
# reset_idrac: false

################################################################################
# OCP node vars
################################################################################
Expand Down