Skip to content

Commit f15974e

Browse files
authored
Merge pull request #50 from openhpc/2026-03-27
ci: apply test cluster fixes and improvements
2 parents dcdd654 + 1431235 commit f15974e

File tree

4 files changed

+30
-19
lines changed

4 files changed

+30
-19
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ shellcheck-lint:
1212
@echo "Running 'shellcheck' on all shell scripts"
1313
shellcheck \
1414
-o quote-safe-variables,deprecate-which,avoid-nullary-conditions \
15-
$$(find . -name *sh) \
15+
$$(find . -name *\.sh) \
1616
ansible/roles/test/files/*.bats
1717

1818
shfmt-lint:
1919
@echo "Running 'shfmt' on all shell scripts"
2020
shfmt -w -d \
21-
$$(find . -name *sh) \
21+
$$(find . -name *\.sh) \
2222
ansible/roles/test/files/*.bats
2323

2424
ansible-lint:

ansible/roles/test/files/run-ci.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ cleanup() {
389389
echo "--> Last job ID: ${LAST_JOB}"
390390
echo -n "--> CI run time: "
391391
date -d@"${DURATION}" -u +%H:%M:%S
392+
echo -n "--> CI logs: "
393+
echo "${DEST_DIR}/${DEST_NAME}"
392394
echo -n "--> CI run result: "
393395
if [ "${RESULT}" == "PASS" ]; then
394396
echo "PASS"
@@ -497,9 +499,12 @@ if [[ "${PROVISIONER}" == "confluent" ]]; then
497499
echo "export dns_domain=local"
498500
} >>"${VARS}"
499501

500-
if [[ "${DISTRIBUTION}" == "rocky"* ]]; then
502+
if [[ "${DISTRIBUTION}" == "rocky9" ]]; then
501503
echo "export iso_path=/root/Rocky-9-latest-x86_64-dvd.iso" >>"${VARS}"
502504
fi
505+
if [[ "${DISTRIBUTION}" == "rocky10" ]]; then
506+
echo "export iso_path=/root/Rocky-10.1-x86_64-dvd1.iso" >>"${VARS}"
507+
fi
503508
if [[ "${DISTRIBUTION}" == "almalinux"* ]]; then
504509
echo "export iso_path=/root/AlmaLinux-9-latest-x86_64-dvd.iso" >>"${VARS}"
505510
fi

ansible/roles/test/files/support_functions.sh

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -326,18 +326,19 @@ install_openHPC_cluster() {
326326
echo "CI Customization: PXE boot selection is not persistent"
327327
sed -e 's,ipmitool,ipmitool -E -I lanplus -H ${c_bmc[$i]} -U ${bmc_username} -P ${bmc_password} chassis bootdev pxe options=efiboot; ipmitool,g' -i "${recipeFile}"
328328
if [ "${PKG_MANAGER}" == "dnf" ]; then
329+
echo "CI Customization: Switch to curl as dnf user agent"
329330
# shellcheck disable=SC2016
330-
sed -e 's,/etc/yum.repos.d$,/etc/yum.repos.d; echo -e "[main]\nuser_agent=curl" > $CHROOT/etc/dnf/dnf.conf,g' -i "${recipeFile}"
331+
sed '/ohpc_proxy:head/s echo -e "[main]\nuser_agent=curl" >> $CHROOT/etc/dnf/dnf.conf,g' -i "${recipeFile}"
331332
fi
332333
if [ "${Provisioner}" == "confluent" ]; then
333334
echo "CI Customization: Switch to http in repository definition"
334-
sed '/excludedocs/a nodersync /etc/yum.repos.d/ compute:/etc/yum.repos.d/' -i "${recipeFile}"
335-
sed '/excludedocs/a nodersync /etc/dnf/dnf.conf compute:/etc/dnf/dnf.conf' -i "${recipeFile}"
336-
sed '/excludedocs/a nodersync /etc/profile.d/proxy.sh compute:/etc/profile.d/proxy.sh' -i "${recipeFile}"
335+
sed '/Add additional packages to compute image/a nodersync /etc/yum.repos.d/ compute:/etc/yum.repos.d/' -i "${recipeFile}"
336+
sed '/ohpc_proxy:compute/a nodersync /etc/dnf/dnf.conf compute:/etc/dnf/dnf.conf' -i "${recipeFile}"
337+
sed '/ohpc_proxy:compute/a nodersync /etc/profile.d/proxy.sh compute:/etc/profile.d/proxy.sh' -i "${recipeFile}"
337338
echo "CI Customization: Switch to text mode installer (nouveau crashes otherwise)"
338339
local PROFILE
339340
PROFILE=$(grep "nodedeploy -n compute" "${recipeFile}" | cut -d\ -f 5)
340-
sed "/nodesetboot compute network/a sed -e 's,\\\\(initrd=distribution\\\\),\\\\1 modprobe.blacklist=nouveau,g' -i /var/lib/confluent/public/os/${PROFILE}/boot.ipxe" -i "${recipeFile}"
341+
sed "/nodesetboot compute network/a sed -e 's;\\\\(initrd=distribution\\\\);\\\\1 modprobe.blacklist=nouveau,mlx5_ib,mlx5_core,mlx5_fwctl,mlxfw;g' -i /var/lib/confluent/public/os/${PROFILE}/boot.ipxe" -i "${recipeFile}"
341342
fi
342343
if [ "${Provisioner}" == "openchami" ]; then
343344
echo "CI Customization: Switch to http in repository definition"
@@ -437,7 +438,9 @@ post_install_cmds() {
437438
local_sleep 1
438439
/opt/xcat/bin/updatenode compute -F
439440
elif [ "${Provisioner}" == "openchami" ]; then
441+
sed -e "/UserKnownHostsFile/d" -i /root/.ssh/config
440442
pdcp -w "${compute_prefix}"[1-"${num_computes}"] /etc/passwd /etc/passwd
443+
pdsh -w "${compute_prefix}[1-${num_computes}]" "sudo sed -i '/^account[[:space:]]\+required[[:space:]]\+pam_unix.so/ { /broken_shadow/! s/$/ broken_shadow/ }' /etc/pam.d/password-auth"
441444
elif [ "${Provisioner}" == "confluent" ]; then
442445
local_sleep 1
443446
/opt/confluent/bin/nodeapply -F compute
@@ -631,7 +634,7 @@ install_doc_rpm() {
631634
DOCS_TMPDIR=$(mktemp -d)
632635
loop_command dnf download -y \
633636
--repofrompath="ohpc-el10,${EL10_REPO_URL}" \
634-
--repo=ohpc-el10 --disablerepo='*' \
637+
--disablerepo='*' \
635638
--downloaddir="${DOCS_TMPDIR}" \
636639
--setopt="ohpc-el10.gpgcheck=0" \
637640
docs-ohpc
@@ -680,6 +683,7 @@ wait_for_computes() {
680683

681684
for i in $(seq 90 -1 1); do
682685
echo "Waiting for compute nodes to get ready ($i)"
686+
koomie_cf -x "${compute_prefix}\\d+" cat /proc/uptime
683687
if ! "${CHECK_COMMAND[@]}" | grep -E '(down|password|refused|booting|route|closed|disconnect|authenticity)'; then
684688
echo "All compute nodes are ready"
685689
not_ready=0
@@ -711,7 +715,7 @@ wait_for_computes() {
711715
pdsh -w "${compute_prefix}"[1-"${num_computes}"] systemctl disable --now firewalld
712716
if [ "${enable_ib}" -eq 0 ]; then
713717
# Disable IB
714-
pdsh -w "${compute_prefix}"[1-"${num_computes}"] rmmod mlx5_ib mlx5_core
718+
pdsh -w "${compute_prefix}"[1-"${num_computes}"] rmmod mlx5_ib mlx5_core mlx5_fwctl mlxfw
715719
fi
716720
fi
717721

@@ -720,6 +724,16 @@ wait_for_computes() {
720724
local_sleep 10
721725
# Mount all NFS. That sometimes fails.
722726
pdsh -w "${compute_prefix}"[1-"${num_computes}"] mount -t nfs -a
727+
elif [ "${Provisioner}" == "warewulf" ]; then
728+
wwsh file sync
729+
local_sleep 10
730+
elif [ "${Provisioner}" == "xcat" ]; then
731+
/opt/xcat/bin/updatenode compute -F
732+
elif [ "${Provisioner}" == "confluent" ]; then
733+
/opt/confluent/bin/nodeshell compute setenforce 0
734+
elif [ "${Provisioner}" == "openchami" ]; then
735+
sed -e "/UserKnownHostsFile/d" -i /root/.ssh/config
736+
pdsh -w "${compute_prefix}[1-${num_computes}]" "sudo sed -i '/^account[[:space:]]\+required[[:space:]]\+pam_unix.so/ { /broken_shadow/! s/$/ broken_shadow/ }' /etc/pam.d/password-auth"
723737
fi
724738

725739
if [ "${RMS}" == "slurm" ]; then
@@ -728,14 +742,6 @@ wait_for_computes() {
728742
scontrol update nodename="${compute_prefix}"[1-"${num_computes}"] state=idle
729743
fi
730744

731-
if [ "${Provisioner}" == "warewulf" ]; then
732-
wwsh file sync
733-
local_sleep 10
734-
fi
735-
736-
if [ "${Provisioner}" == "xcat" ]; then
737-
/opt/xcat/bin/updatenode compute -F
738-
fi
739745
set +x
740746
}
741747

ansible/roles/test/templates/el-kickstart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
{%- endif -%}
3131
{%- set base = 'openEuler-'~release~'-LTS-'~SP~'-everything/' -%}
3232
{%- if inventory_hostname_short.startswith('ohpc-lenovo-repo') -%}
33-
{%- set mirror1_base = 'http://ftp.agdsn.de/openeuler/openEuler-'~release~'-LTS-'~SP~'/' -%}
33+
{%- set mirror1_base = 'http://mirrors.dotsrc.org/openeuler/openEuler-'~release~'-LTS-'~SP~'/' -%}
3434
{%- elif inventory_hostname_short.startswith('ohpc-huawei-repo') -%}
3535
{%- set mirror1_base = 'http://mirrors.nju.edu.cn/openeuler/openEuler-'~release~'-LTS-'~SP~'/' -%}
3636
{%- endif -%}

0 commit comments

Comments
 (0)