Skip to content

Upgrade: 25.10.5 → 26.1.1 | e2e-yaml-file | 192.168.10.211 #3

Upgrade: 25.10.5 → 26.1.1 | e2e-yaml-file | 192.168.10.211

Upgrade: 25.10.5 → 26.1.1 | e2e-yaml-file | 192.168.10.211 #3

name: Upgrade Bootstrap + Run Upgrade Tests
run-name: "Upgrade: ${{ inputs.BASE_SBCLI_BRANCH }} → ${{ inputs.TARGET_SBCLI_BRANCH }} | ${{ github.ref_name }} | ${{ inputs.MNODES }}"
# Bootstraps a cluster using BASE_SBCLI_BRANCH + BASE_SPDK_IMAGE, then runs
# upgrade_e2e.py (TestMajorUpgrade) to upgrade to TARGET_SBCLI_BRANCH + TARGET_SPDK_IMAGE.
on:
workflow_call:
inputs:
BASE_SBCLI_BRANCH:
type: string
required: true
description: "sbcli branch / version tag to bootstrap the cluster with (e.g. R25.10-Hotfix)"
CUSTOM_IMAGES:
type: string
default: 'base_spdk="" target_spdk="" base_docker="" target_docker=""'
description: "Image overrides: set base_spdk, target_spdk, base_docker and/or target_docker values, leave as \"\" to skip."
TARGET_SBCLI_BRANCH:
type: string
required: true
description: "sbcli branch / version tag to upgrade to (e.g. main)"
STORAGE_PRIVATE_IPS:
type: string
default: "192.168.10.205 192.168.10.206 192.168.10.207 192.168.10.208"
API_INVOKE_URL:
type: string
default: "http://192.168.10.211/"
BASTION_IP:
type: string
default: "192.168.10.211"
MNODES:
type: string
default: "192.168.10.211"
NR_HUGEPAGES:
type: string
default: "2048"
GRAFANA_ENDPOINT:
type: string
default: "http://192.168.10.211/grafana"
SBCLI_CMD:
type: string
default: "sbctl"
SSH_USER:
type: string
default: "root"
KEY_PATH:
type: string
default: "/home/ec2-user/.ssh/simplyblock-us-east-2.pem"
CLIENTNODES:
type: string
default: "192.168.10.165 192.168.10.166"
NFS_MOUNTPOINT:
type: string
default: "/mnt/nfs_share"
BOOTSTRAP_MAX_LVOL:
type: string
default: "300"
BOOTSTRAP_DATA_CHUNKS:
type: string
default: "2"
BOOTSTRAP_PARITY_CHUNKS:
type: string
default: "2"
BOOTSTRAP_JOURNAL_PARTITION:
type: string
default: "1"
BOOTSTRAP_HA_JM_COUNT:
type: string
default: "3"
BOOTSTRAP_HA_TYPE:
type: string
default: "ha"
BOOTSTRAP_DATA_NIC:
type: string
default: "eth1"
BOOTSTRAP_IS_SINGLE_NODE:
type: boolean
default: false
BOOTSTRAP_ENABLE_NODE_AFFINITY:
type: boolean
default: false
TEST_CLASS:
type: string
default: "major_upgrade"
RUN_LABEL:
type: string
default: ""
description: "Optional label appended to artifact names to avoid collisions (e.g. 'run1')"
workflow_dispatch:
inputs:
BASE_SBCLI_BRANCH:
description: "sbcli branch / version tag to bootstrap the cluster with (e.g. R25.10-Hotfix)"
required: true
CUSTOM_IMAGES:
description: "Image overrides: set base_spdk, target_spdk, base_docker and/or target_docker values, leave as \"\" to skip."
required: false
default: 'base_spdk="" target_spdk="" base_docker="" target_docker=""'
TARGET_SBCLI_BRANCH:
description: "sbcli branch / version tag to upgrade to (e.g. main)"
required: true
STORAGE_PRIVATE_IPS:
description: "Space-separated storage node IPs (also used for cleanup)"
required: true
default: "192.168.10.205 192.168.10.206 192.168.10.207 192.168.10.208"
API_INVOKE_URL:
description: "API invoke URL"
required: true
default: "http://192.168.10.211/"
BASTION_IP:
description: "Bastion IP"
required: true
default: "192.168.10.211"
MNODES:
description: "Management node(s) IPs"
required: true
default: "192.168.10.211"
NR_HUGEPAGES:
description: "Hugepages"
required: true
default: "2048"
GRAFANA_ENDPOINT:
description: "Grafana endpoint"
required: true
default: "http://192.168.10.211/grafana"
SBCLI_CMD:
description: "sbcli command name"
required: true
default: "sbctl"
SSH_USER:
description: "SSH user"
required: true
default: "root"
KEY_PATH:
description: "SSH private key path on runner"
required: true
default: "/home/ec2-user/.ssh/simplyblock-us-east-2.pem"
CLIENTNODES:
description: "Space-separated client node IPs"
required: true
default: "192.168.10.165 192.168.10.166"
NFS_MOUNTPOINT:
description: "NFS mountpoint to unmount everywhere"
required: true
default: "/mnt/nfs_share"
BOOTSTRAP_MAX_LVOL:
description: "bootstrap: --max-lvol"
required: true
default: "300"
BOOTSTRAP_DATA_CHUNKS:
description: "bootstrap: --data-chunks-per-stripe"
required: true
default: "2"
BOOTSTRAP_PARITY_CHUNKS:
description: "bootstrap: --parity-chunks-per-stripe"
required: true
default: "2"
BOOTSTRAP_JOURNAL_PARTITION:
description: "bootstrap: --journal-partition"
required: true
default: "1"
BOOTSTRAP_HA_JM_COUNT:
description: "bootstrap: --ha-jm-count"
required: true
default: "3"
BOOTSTRAP_HA_TYPE:
description: "bootstrap: --ha-type"
required: true
default: "ha"
BOOTSTRAP_DATA_NIC:
description: "bootstrap: --data-nics"
required: true
default: "eth1"
BOOTSTRAP_IS_SINGLE_NODE:
description: "Bootstrap: deploy as single-node"
type: boolean
required: false
default: false
BOOTSTRAP_ENABLE_NODE_AFFINITY:
description: "Bootstrap: enable node affinity"
type: boolean
required: false
default: false
TEST_CLASS:
description: "Upgrade test class name (--testname); leave empty to run all upgrade tests"
required: false
type: string
default: "major_upgrade"
concurrency:
group: simplyblock-lab-upgrade
cancel-in-progress: false
jobs:
bootstrap-and-upgrade:
name: Pre-clean -> Bootstrap (${{ inputs.BASE_SBCLI_BRANCH }}) -> Upgrade (${{ inputs.TARGET_SBCLI_BRANCH }})
runs-on: [self-hosted]
timeout-minutes: 300
env:
# Upgrade-specific
BASE_SBCLI_BRANCH: ${{ inputs.BASE_SBCLI_BRANCH }}
TARGET_SBCLI_BRANCH: ${{ inputs.TARGET_SBCLI_BRANCH }}
CUSTOM_IMAGES: ${{ inputs.CUSTOM_IMAGES || 'base_spdk="" target_spdk="" base_docker="" target_docker=""' }}
# Cluster/lab env
STORAGE_PRIVATE_IPS: ${{ inputs.STORAGE_PRIVATE_IPS || '192.168.10.205 192.168.10.206 192.168.10.207 192.168.10.208' }}
API_INVOKE_URL: ${{ inputs.API_INVOKE_URL || 'http://192.168.10.211/' }}
API_BASE_URL: ${{ inputs.API_INVOKE_URL || 'http://192.168.10.211/' }}
BASTION_IP: ${{ inputs.BASTION_IP || '192.168.10.211' }}
BASTION_SERVER: ${{ inputs.BASTION_IP || '192.168.10.211' }}
MNODES: ${{ inputs.MNODES || '192.168.10.211' }}
NR_HUGEPAGES: ${{ inputs.NR_HUGEPAGES || '2048' }}
GRAFANA_ENDPOINT: ${{ inputs.GRAFANA_ENDPOINT || 'http://192.168.10.211/grafana' }}
SBCLI_CMD: ${{ inputs.SBCLI_CMD || 'sbctl' }}
# SSH/client env
SSH_USER: ${{ inputs.SSH_USER || 'root' }}
KEY_PATH: ${{ inputs.KEY_PATH || '/home/ec2-user/.ssh/simplyblock-us-east-2.pem' }}
CLIENTNODES: ${{ inputs.CLIENTNODES || '192.168.10.165 192.168.10.166' }}
CLIENT_IP: ${{ inputs.CLIENTNODES || '192.168.10.165 192.168.10.166' }}
# Cleanup
NFS_MOUNTPOINT: ${{ inputs.NFS_MOUNTPOINT || '/mnt/nfs_share' }}
# Bootstrap params
BOOTSTRAP_MAX_LVOL: ${{ inputs.BOOTSTRAP_MAX_LVOL || '300' }}
BOOTSTRAP_DATA_CHUNKS: ${{ inputs.BOOTSTRAP_DATA_CHUNKS || '2' }}
BOOTSTRAP_PARITY_CHUNKS: ${{ inputs.BOOTSTRAP_PARITY_CHUNKS || '2' }}
BOOTSTRAP_JOURNAL_PARTITION: ${{ inputs.BOOTSTRAP_JOURNAL_PARTITION || '1' }}
BOOTSTRAP_HA_JM_COUNT: ${{ inputs.BOOTSTRAP_HA_JM_COUNT || '3' }}
BOOTSTRAP_HA_TYPE: ${{ inputs.BOOTSTRAP_HA_TYPE || 'ha' }}
BOOTSTRAP_DATA_NIC: ${{ inputs.BOOTSTRAP_DATA_NIC || 'eth1' }}
TEST_CLASS: ${{ inputs.TEST_CLASS || 'major_upgrade' }}
# Secrets
SSH_PASSWORD: ${{ secrets.SSH_PASSWORD }}
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
MINIO_ACCESS_KEY: ${{ secrets.MINIO_ACCESS_KEY }}
MINIO_SECRET_KEY: ${{ secrets.MINIO_SECRET_KEY }}
SUPABASE_ANON_KEY: ${{ secrets.SUPABASE_ANON_KEY }}
# Filled after bootstrap parsing
CLUSTER_ID: ""
CLUSTER_SECRET: ""
steps:
- name: Runner diagnostics
shell: bash
run: |
set -euxo pipefail
uname -a
whoami
pwd
python3 --version || true
git --version
- name: Clear stale test artifacts from previous run
shell: bash
run: |
rm -f sbcli/e2e/output.log || true
- name: Install prereqs (sshpass)
shell: bash
run: |
set -euxo pipefail
if command -v sshpass >/dev/null 2>&1; then
exit 0
fi
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update -y
sudo apt-get install -y sshpass
elif command -v yum >/dev/null 2>&1; then
sudo yum install -y epel-release || true
sudo yum install -y sshpass
elif command -v dnf >/dev/null 2>&1; then
sudo dnf install -y sshpass
else
echo "ERROR: Cannot install sshpass (unknown package manager)."
exit 1
fi
- name: Parse CUSTOM_IMAGES overrides
shell: bash
run: |
set -euxo pipefail
custom="${CUSTOM_IMAGES}"
for item in $custom; do
key="${item%%=*}"
value="${item#*=}"
value="${value//\"/}"
if [[ -z "$value" ]]; then
echo "Skipping $key (empty)"
continue
fi
case "$key" in
base_spdk) echo "BASE_SPDK_IMAGE=$value" >> "$GITHUB_ENV"
echo "SIMPLY_BLOCK_SPDK_ULTRA_IMAGE=$value" >> "$GITHUB_ENV"
echo "Base SPDK image set: $value" ;;
target_spdk) echo "TARGET_SPDK_IMAGE=$value" >> "$GITHUB_ENV"
echo "Target SPDK image set: $value" ;;
base_docker) echo "BASE_DOCKER_IMAGE=$value" >> "$GITHUB_ENV"
echo "SIMPLY_BLOCK_DOCKER_IMAGE=$value" >> "$GITHUB_ENV"
echo "Base Docker image set: $value" ;;
target_docker) echo "TARGET_DOCKER_IMAGE=$value" >> "$GITHUB_ENV"
echo "Target Docker image set: $value" ;;
*) echo "Unknown image key: $key (ignored)" ;;
esac
done
- name: Resolve KEY_PATH and validate key exists
shell: bash
run: |
set -euxo pipefail
kp="${KEY_PATH}"
kp="${kp%\"}"; kp="${kp#\"}"
kp="${kp%\'}"; kp="${kp#\'}"
if [[ "$kp" == .ssh/* ]]; then kp="${HOME}/${kp}"; fi
if [[ "$kp" == ~/* ]]; then kp="${HOME}/${kp#~/}"; fi
if [[ "$kp" == "~.ssh/"* ]]; then kp="${HOME}/.${kp#~.}"; fi
echo "Resolved KEY_PATH=$kp"
echo "KEY_PATH=$kp" >> "$GITHUB_ENV"
test -f "$kp" || (echo "ERROR: SSH key not found at $kp" && exit 1)
chmod 600 "$kp" || true
- name: Export KEY_NAME from KEY_PATH
shell: bash
run: |
set -euxo pipefail
key_name="$(basename "${KEY_PATH}")"
echo "KEY_NAME=${key_name}" >> "$GITHUB_ENV"
echo "Exported KEY_NAME=${key_name}"
- name: Validate required secrets exist
shell: bash
run: |
set -euxo pipefail
[[ -n "${SSH_PASSWORD}" ]] || (echo "ERROR: secrets.SSH_PASSWORD required" && exit 1)
# ============================================================
# PRE-BOOTSTRAP CLEANUP
# ============================================================
- name: Pre-clean kill fio/tmux and unmount NFS on MNODES + storage + clients
shell: bash
run: |
set -euxo pipefail
run_remote() {
local ip="$1"
local script="$2"
sshpass -p "${SSH_PASSWORD}" ssh \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
"${SSH_USER}@${ip}" "bash -s" <<< "$script"
}
targets="$MNODES $STORAGE_PRIVATE_IPS $CLIENTNODES"
uniq_targets="$(echo "$targets" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ')"
for ip in $uniq_targets; do
echo "---- $ip: kill fio/tmux + umount ${NFS_MOUNTPOINT} ----"
run_remote "$ip" "set -euxo pipefail;
pkill -9 fio || true;
pkill -9 tmux || true;
mp='${NFS_MOUNTPOINT}';
if mountpoint -q \"\$mp\"; then umount -f \"\$mp\" || umount \"\$mp\"; else
if mount | grep -q \" \$mp \"; then umount -f \"\$mp\" || umount \"\$mp\" || true; fi
fi"
done
- name: Destroy/clean storage nodes (deploy-cleaner, docker prune, uninstall sbcli, k3s)
shell: bash
run: |
set -euxo pipefail
run_remote() {
local ip="$1"
local script="$2"
sshpass -p "${SSH_PASSWORD}" ssh \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
"${SSH_USER}@${ip}" "bash -s" <<< "$script"
}
targets="$MNODES $STORAGE_PRIVATE_IPS"
uniq_targets="$(echo "$targets" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ')"
for ip in $uniq_targets; do
echo "---- storage destroy/clean: $ip ----"
run_remote "$ip" "set -euxo pipefail;
systemctl stop firewalld || true;
systemctl stop ufw || true;
sysctl -w net.ipv6.conf.all.disable_ipv6=1 || true;
'${SBCLI_CMD}' sn deploy-cleaner || echo 'WARN: deploy-cleaner failed';
docker stop \$(docker ps -aq) || true;
docker rm -f \$(docker ps -aq) || true;
docker builder prune --all -f || true;
docker system prune -af || true;
docker volume prune -f || true;
docker rmi -f \$(docker images -aq) || true;
pip uninstall -y '${SBCLI_CMD}' || echo 'WARN: uninstall sbcli failed';
pip uninstall -y sbctl || echo 'WARN: uninstall sbctl failed';
rm -rf /usr/local/bin/sbc* || true;
k3s-agent-uninstall.sh || true"
sleep 10
done
- name: Client cleanup disconnect lvols; unmount all /mnt; remove /mnt dirs
shell: bash
run: |
set -euxo pipefail
run_remote() {
local ip="$1"
local script="$2"
sshpass -p "${SSH_PASSWORD}" ssh \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
"${SSH_USER}@${ip}" "bash -s" <<< "$script"
}
for ip in $CLIENTNODES; do
echo "---- client disconnect lvols: $ip ----"
run_remote "$ip" "set -euxo pipefail;
subsystems=\$(nvme list-subsys | grep -i lvol | awk '{print \$3}' | cut -d '=' -f 2 || true);
for s in \$subsystems; do nvme disconnect -n \"\$s\" || true; done"
done
targets="$MNODES $STORAGE_PRIVATE_IPS $CLIENTNODES"
uniq_targets="$(echo "$targets" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ')"
still=0
for ip in $uniq_targets; do
if sshpass -p "${SSH_PASSWORD}" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${SSH_USER}@${ip}" \
"mount | grep -q \" ${NFS_MOUNTPOINT} \""; then
echo "ERROR: ${NFS_MOUNTPOINT} still mounted on $ip"
still=1
fi
done
[[ "$still" -eq 0 ]] || exit 1
for ip in $CLIENTNODES; do
echo "---- client unmount all /mnt and remove dirs: $ip ----"
run_remote "$ip" "set -euxo pipefail;
mps=\$(mount | grep ' /mnt' | awk '{print \$3}' || true);
for mp in \$mps; do umount -f \"\$mp\" || umount \"\$mp\" || true; done;
dirs=\$(find /mnt -mindepth 1 -type d 2>/dev/null || true);
for d in \$dirs; do rm -rf \"\$d\" || true; done"
done
- name: Remove /etc/simplyblock; reboot storage; disk reset + PCI rebind + mklabel
shell: bash
run: |
set -euxo pipefail
run_remote() {
local ip="$1"
local script="$2"
sshpass -p "${SSH_PASSWORD}" ssh \
-o StrictHostKeyChecking=no \
-o UserKnownHostsFile=/dev/null \
"${SSH_USER}@${ip}" "bash -s" <<< "$script"
}
targets="$MNODES $STORAGE_PRIVATE_IPS"
uniq_targets="$(echo "$targets" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ')"
for ip in $uniq_targets; do
run_remote "$ip" "rm -rf /etc/simplyblock || true"
done
for ip in $STORAGE_PRIVATE_IPS; do
echo "---- reboot storage: $ip ----"
sshpass -p "${SSH_PASSWORD}" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${SSH_USER}@${ip}" \
"nohup reboot >/dev/null 2>&1 &" || true
done
for ip in $STORAGE_PRIVATE_IPS; do
echo "Waiting for $ip..."
for i in {1..60}; do
if sshpass -p "${SSH_PASSWORD}" ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 "${SSH_USER}@${ip}" \
"echo online" >/dev/null 2>&1; then
echo "$ip online"
break
fi
sleep 10
[[ "$i" -lt 60 ]] || (echo "ERROR: $ip did not come online" && exit 1)
done
done
for ip in $STORAGE_PRIVATE_IPS; do
echo "---- disk reset on $ip ----"
run_remote "$ip" "set -euxo pipefail;
for dev in /dev/nvme0n1 /dev/nvme1n1 /dev/nvme2n1 /dev/nvme3n1; do
if [[ -b \$dev ]]; then
parted \$dev --script rm 1 || true;
parted \$dev --script rm 2 || true;
parted \$dev --script rm 3 || true;
fi
done;
for pci in \$(lspci -D | grep 'QEMU NVM Express' | awk '{print \$1}' || true); do
echo \$pci > /sys/bus/pci/drivers/nvme/unbind || true;
done;
for i in 2 3 4 5; do
echo 0000:00:0\${i}.0 > /sys/bus/pci/drivers/uio_pci_generic/unbind || true;
done;
for i in 2 3 4 5; do
echo 0000:00:0\${i}.0 > /sys/bus/pci/drivers/vfio-pci/unbind || true;
done;
for i in 2 3 4 5; do
echo nvme > /sys/bus/pci/devices/0000:00:0\${i}.0/driver_override || true;
done;
for i in 2 3 4 5; do
echo 0000:00:0\${i}.0 > /sys/bus/pci/drivers/nvme/bind || true;
done;
for i in 0 1 2 3; do
dev=/dev/nvme\${i}n1;
if [[ -b \$dev ]]; then
parted -fs \$dev mklabel gpt || true;
fi
done"
done
# ============================================================
# BOOTSTRAP (using BASE_SBCLI_BRANCH + BASE_SPDK_IMAGE)
# ============================================================
- name: Clone simplyBlockDeploy (bootstrap repo)
shell: bash
run: |
set -euxo pipefail
rm -rf simplyBlockDeploy
git clone https://github.com/simplyblock-io/simplyBlockDeploy.git simplyBlockDeploy
test -f simplyBlockDeploy/bare-metal/bootstrap-cluster.sh
- name: Bootstrap cluster (BASE_SBCLI_BRANCH=${{ inputs.BASE_SBCLI_BRANCH }})
shell: bash
run: |
set -euxo pipefail
cd simplyBlockDeploy/bare-metal
chmod +x ./bootstrap-cluster.sh
extra_args=()
# Images exported by the "Parse CUSTOM_IMAGES overrides" step
if [[ -n "${SIMPLY_BLOCK_SPDK_ULTRA_IMAGE-}" ]]; then
extra_args+=( --spdk-image "${SIMPLY_BLOCK_SPDK_ULTRA_IMAGE}" )
fi
if [[ "${{ inputs.BOOTSTRAP_IS_SINGLE_NODE }}" == "true" ]]; then
extra_args+=( --is-single-node true )
fi
if [[ "${{ inputs.BOOTSTRAP_ENABLE_NODE_AFFINITY }}" == "true" ]]; then
extra_args+=( --enable-node-affinity true )
fi
set +e
./bootstrap-cluster.sh \
--sbcli-cmd "${SBCLI_CMD}" \
--max-lvol "${BOOTSTRAP_MAX_LVOL}" \
--data-chunks-per-stripe "${BOOTSTRAP_DATA_CHUNKS}" \
--parity-chunks-per-stripe "${BOOTSTRAP_PARITY_CHUNKS}" \
--journal-partition "${BOOTSTRAP_JOURNAL_PARTITION}" \
--ha-jm-count "${BOOTSTRAP_HA_JM_COUNT}" \
--ha-type "${BOOTSTRAP_HA_TYPE}" \
--data-nics "${BOOTSTRAP_DATA_NIC}" \
"${extra_args[@]}" | tee bootstrap.log
rc=${PIPESTATUS[0]}
set -e
[[ "$rc" -eq 0 ]] || (echo "ERROR: bootstrap failed (rc=$rc)" && exit "$rc")
- name: Fetch CLUSTER_ID and CLUSTER_SECRET from MNODES
shell: bash
run: |
set -euxo pipefail
mgmt_ip="$(echo "${MNODES}" | awk '{print $1}')"
ssh_common=(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i "${KEY_PATH}")
cluster_id="$(ssh "${ssh_common[@]}" "${SSH_USER}@${mgmt_ip}" \
"${SBCLI_CMD} cluster list" | grep -Eo '[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}' | head -n 1)"
if [[ -z "${cluster_id}" ]]; then
echo "ERROR: Could not extract cluster_id from '${SBCLI_CMD} cluster list' on ${mgmt_ip}"
ssh "${ssh_common[@]}" "${SSH_USER}@${mgmt_ip}" "${SBCLI_CMD} cluster list" || true
exit 1
fi
cluster_secret="$(ssh "${ssh_common[@]}" "${SSH_USER}@${mgmt_ip}" \
"${SBCLI_CMD} cluster get-secret ${cluster_id}" | tr -d '\r' | tail -n 1 | xargs)"
if [[ -z "${cluster_secret}" ]]; then
echo "ERROR: Could not get cluster_secret from '${SBCLI_CMD} cluster get-secret ${cluster_id}' on ${mgmt_ip}"
exit 1
fi
echo "CLUSTER_ID=${cluster_id}" >> "$GITHUB_ENV"
echo "CLUSTER_SECRET=${cluster_secret}" >> "$GITHUB_ENV"
echo "Fetched CLUSTER_ID=${cluster_id}"
echo "Fetched CLUSTER_SECRET=***set***"
# ============================================================
# UPGRADE E2E TESTS (clone from TARGET_SBCLI_BRANCH)
# ============================================================
- name: Clone sbcli repo (prefer workflow branch; fallback to TARGET_SBCLI_BRANCH)
shell: bash
run: |
set -euxo pipefail
rm -rf sbcli
wf_branch="${{ github.ref_name }}"
fallback_branch="${TARGET_SBCLI_BRANCH}"
echo "Workflow branch: $wf_branch"
echo "Fallback sbcli branch (TARGET_SBCLI_BRANCH): $fallback_branch"
if git ls-remote --heads https://github.com/simplyblock-io/sbcli.git "$wf_branch" | grep -q "$wf_branch"; then
echo "Cloning sbcli on workflow branch: $wf_branch"
git clone --branch "$wf_branch" --single-branch https://github.com/simplyblock-io/sbcli.git sbcli
else
echo "Branch '$wf_branch' not found in sbcli; cloning fallback branch: $fallback_branch"
git clone --branch "$fallback_branch" --single-branch https://github.com/simplyblock-io/sbcli.git sbcli
fi
test -f sbcli/e2e/upgrade_e2e.py
test -f sbcli/e2e/e2e_tests/upgrade_tests/major_upgrade.py
test -f sbcli/e2e/logs/cleanup.py
- name: Install Python deps (best-effort)
shell: bash
run: |
set -euxo pipefail
python3 -m pip install --upgrade pip
if [[ -f "sbcli/e2e/requirements.txt" ]]; then
pip install -r sbcli/e2e/requirements.txt
fi
- name: Cleanup logs before upgrade e2e
shell: bash
working-directory: sbcli/e2e
run: |
set -euxo pipefail
python3 logs/cleanup.py
- name: Set RUN_BASE_DIR
shell: bash
run: |
set -euxo pipefail
RUN_TIMESTAMP="$(date +%Y%m%d-%H%M%S)"
RUN_BASE_DIR="${NFS_MOUNTPOINT}/upgrade-run-${RUN_TIMESTAMP}-${GITHUB_RUN_ID}"
echo "RUN_BASE_DIR=${RUN_BASE_DIR}" >> "$GITHUB_ENV"
mkdir -p "${RUN_BASE_DIR}"
- name: Record test start time
shell: bash
run: |
set -euxo pipefail
echo "TEST_START_EPOCH=$(date +%s)" >> "$GITHUB_ENV"
echo "TEST_START_HUMAN=$(date -u +'%Y-%m-%d %H:%M:%S UTC')" >> "$GITHUB_ENV"
- name: Run upgrade e2e tests (major_upgrade)
shell: bash
working-directory: sbcli/e2e
run: |
set -euxo pipefail
TESTNAME_ARGS=()
if [[ -n "${TEST_CLASS:-}" ]]; then
TESTNAME_ARGS=(--testname "${TEST_CLASS}")
fi
python3 -u upgrade_e2e.py \
--base_version "${BASE_SBCLI_BRANCH}" \
--target_version "${TARGET_SBCLI_BRANCH}" \
--base_spdk_image "${BASE_SPDK_IMAGE:-}" \
--target_spdk_image "${TARGET_SPDK_IMAGE:-}" \
--target_docker_image "${TARGET_DOCKER_IMAGE:-}" \
"${TESTNAME_ARGS[@]}" \
2>&1 | tee output.log
- name: Mark test end time (always)
if: always()
shell: bash
run: |
set -euxo pipefail
echo "TEST_END_EPOCH=$(date +%s)" >> "$GITHUB_ENV"
echo "TEST_END_HUMAN=$(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> "$GITHUB_ENV"
- name: Collect mgmt snapshots into RUN_BASE_DIR (always)
if: always()
shell: bash
run: |
set -euxo pipefail
python3 - <<'PY'
import os, subprocess, json
mgmt_ip = os.environ["MNODES"].split()[0]
key = os.environ["KEY_PATH"]
user = os.environ["SSH_USER"]
sbcli = os.environ["SBCLI_CMD"]
cluster_id = os.environ["CLUSTER_ID"]
run_base = os.environ["RUN_BASE_DIR"].rstrip("/")
outdir = f"{run_base}/{mgmt_ip}/mgmt_details"
os.makedirs(f"{outdir}/mgmt", exist_ok=True)
os.makedirs(f"{outdir}/subtasks", exist_ok=True)
os.makedirs(f"{outdir}/storage_nodes", exist_ok=True)
ssh_base = [
"ssh", "-i", key,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=10",
f"{user}@{mgmt_ip}",
]
def run_cmd(cmd, out_file):
print(f" {cmd} -> {out_file}", flush=True)
try:
with open(out_file, "w") as f:
subprocess.run(ssh_base + [cmd], stdout=f, stderr=subprocess.STDOUT, check=False, timeout=60)
except Exception as e:
print(f" WARN: failed: {e}", flush=True)
run_cmd(f"{sbcli} cluster list", f"{outdir}/mgmt/cluster_list.txt")
run_cmd(f"{sbcli} cluster status {cluster_id}", f"{outdir}/mgmt/cluster_status.txt")
run_cmd(f"{sbcli} cluster show {cluster_id}", f"{outdir}/mgmt/cluster_show.txt")
run_cmd(f"{sbcli} cluster get-capacity {cluster_id}", f"{outdir}/mgmt/cluster_capacity.txt")
run_cmd(f"{sbcli} cluster get-logs {cluster_id} --limit 0", f"{outdir}/mgmt/cluster_get_logs.txt")
run_cmd(f"{sbcli} pool list", f"{outdir}/mgmt/pool_list.txt")
run_cmd(f"{sbcli} lvol list", f"{outdir}/mgmt/lvol_list.txt")
run_cmd(f"{sbcli} snapshot list", f"{outdir}/mgmt/snapshot_list.txt")
run_cmd(f"{sbcli} sn list", f"{outdir}/mgmt/sn_list.txt")
run_cmd(f"{sbcli} sn list --json", f"{outdir}/mgmt/sn_list.json")
sn_uuids = []
try:
with open(f"{outdir}/mgmt/sn_list.json") as f:
data = json.load(f)
for item in (data if isinstance(data, list) else []):
uid = item.get("UUID") or item.get("uuid") or item.get("Id") or item.get("id")
if uid:
sn_uuids.append(uid)
except Exception:
pass
for idx, uuid in enumerate(sn_uuids, 1):
run_cmd(f"{sbcli} sn list-devices {uuid}", f"{outdir}/storage_nodes/node{idx}_list_devices.txt")
run_cmd(f"{sbcli} sn check {uuid}", f"{outdir}/storage_nodes/node{idx}_check.txt")
run_cmd(f"{sbcli} sn get {uuid}", f"{outdir}/storage_nodes/node{idx}_get.txt")
run_cmd(f"{sbcli} cluster list-tasks {cluster_id} --limit 0", f"{outdir}/mgmt/cluster_list_tasks.txt")
bal_ids = []
try:
with open(f"{outdir}/mgmt/cluster_list_tasks.txt") as f:
for line in f:
if line.startswith("+") or "Task ID" in line or "|" not in line:
continue
cols = [c.strip() for c in line.split("|")]
if len(cols) >= 5 and cols[3] == "balancing_on_restart" and cols[1]:
bal_ids.append(cols[1])
except Exception:
pass
for tid in bal_ids:
run_cmd(f"{sbcli} cluster get-subtasks {tid}", f"{outdir}/subtasks/{tid}_subtasks.txt")
PY
- name: Collect docker logs into RUN_BASE_DIR (always)
if: always()
shell: bash
run: |
set -euxo pipefail
TAG="containers-final-$(date +%Y%m%d_%H%M%S)"
SSH_OPTS=(-i "${KEY_PATH}" -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=10 -C)
NODES="$(echo "${MNODES} ${STORAGE_PRIVATE_IPS}" | tr ' ' '\n' | sed '/^$/d' | sort -u | tr '\n' ' ')"
for NODE in ${NODES}; do
echo ">>> Node: ${NODE}"
LOCAL_NODE_DIR="${RUN_BASE_DIR}/${NODE}/${TAG}"
mkdir -p "${LOCAL_NODE_DIR}"
ssh "${SSH_OPTS[@]}" "${SSH_USER}@${NODE}" "docker ps -a 2>&1 || true" \
> "${LOCAL_NODE_DIR}/docker_ps_a_${NODE}.txt" || true
CONTAINERS="$(ssh "${SSH_OPTS[@]}" "${SSH_USER}@${NODE}" \
"docker ps -a --format '{{.Names}}' 2>/dev/null || true" 2>/dev/null || true)"
if [[ -z "${CONTAINERS}" ]]; then
echo "No containers found on ${NODE}" > "${LOCAL_NODE_DIR}/_NO_CONTAINERS_${NODE}.txt"
continue
fi
while IFS= read -r C; do
[[ -z "${C}" ]] && continue
echo " dumping: ${C}"
ssh "${SSH_OPTS[@]}" "${SSH_USER}@${NODE}" \
"docker logs --timestamps --details '${C}' 2>&1 || true" \
> "${LOCAL_NODE_DIR}/${C}.txt" || true
ssh "${SSH_OPTS[@]}" "${SSH_USER}@${NODE}" \
"docker inspect '${C}' 2>&1 || true" \
> "${LOCAL_NODE_DIR}/${C}_inspect.json" || true
done <<< "${CONTAINERS}"
done
- name: Collect distrib debug dumps into RUN_BASE_DIR (always)
if: always()
timeout-minutes: 35
shell: bash
run: |
set -euxo pipefail
python3 - <<'PY'
import os, subprocess, sys
ssh_user = os.environ["SSH_USER"]
key = os.environ["KEY_PATH"]
run_base = os.environ["RUN_BASE_DIR"].rstrip("/")
tag = "finaldistrib_bdev_logs"
storage_ips = os.environ["STORAGE_PRIVATE_IPS"].split()
ssh_base = [
"ssh",
"-i", key,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ServerAliveInterval=15",
"-o", "ServerAliveCountMax=4",
"-o", "ConnectTimeout=10",
"-C",
]
scp_base = [
"scp",
"-i", key,
"-o", "StrictHostKeyChecking=no",
"-o", "UserKnownHostsFile=/dev/null",
"-o", "ConnectTimeout=10",
"-C",
]
remote_script = """\
set -euo pipefail
TS="$(date +%Y%m%d_%H%M%S)"
HOST="$(hostname -s 2>/dev/null || hostname)"
STAGING="/tmp/distrib_host_collect_${TS}"
mkdir -p "$STAGING"
CN="$(sudo docker ps --format '{{.Names}}' | grep -E '^spdk_[0-9]+$' | head -n1 || true)"
if [[ -z "$CN" ]]; then echo "NO_SPDK_CONTAINER"; exit 0; fi
SOCK="/mnt/ramdisk/${CN}/spdk.sock"
BDEV_JSON="$(sudo docker exec "$CN" bash -lc "python spdk/scripts/rpc.py -s '$SOCK' bdev_get_bdevs" 2>/dev/null || true)"
if [[ -z "$BDEV_JSON" ]]; then echo "BDEV_EMPTY"; exit 0; fi
if command -v jq >/dev/null 2>&1; then
mapfile -t DISTRIBS < <(printf '%s' "$BDEV_JSON" | jq -r '.[] | select(.name|startswith("distrib_")) | .name' | sort -u)
else
mapfile -t DISTRIBS < <(printf '%s\\n' "$BDEV_JSON" | grep -oE '"name"\\s*:\\s*"distrib_[^"]+"' | sed -E 's/.*"name"\\s*:\\s*"([^"]+)".*/\\1/' | sort -u)
fi
if [[ ${#DISTRIBS[@]} -eq 0 ]]; then echo "NO_DISTRIBS"; exit 0; fi
for d in "${DISTRIBS[@]}"; do
JF="/tmp/stack_${d}.json"
python3 - "$d" "$JF" <<'PYIN'
import json, sys
d = sys.argv[1]
jf = sys.argv[2]
obj = {"subsystems":[{"subsystem":"distr","config":[{"method":"distr_debug_placement_map_dump","params":{"name":d}}]}]}
with open(jf, "w") as f:
f.write(json.dumps(obj))
PYIN
sudo docker cp "$JF" "$CN:$JF" || true
sudo docker exec "$CN" bash -lc "python scripts/rpc_sock.py '$JF' '$SOCK' > /tmp/rpc_${d}.log 2>&1 || true" || true
sudo docker cp "$CN:/tmp/rpc_${d}.log" "$STAGING/rpc_${d}.log" 2>/dev/null || true
for f in $(sudo docker exec "$CN" bash -lc "ls /tmp 2>/dev/null | grep -F \\\"$d\\\" || true"); do
sudo docker cp "$CN:/tmp/$f" "$STAGING/${CN}__$f" 2>/dev/null || true
done
sudo docker exec "$CN" bash -lc "rm -f '$JF' '/tmp/rpc_${d}.log'" || true
rm -f "$JF" || true
done
cat /proc/meminfo | grep -i huge > "$STAGING/hugepage_meminfo.txt" 2>/dev/null || true
TAR="/tmp/${HOST}_distrib_dumps_${TS}.tgz"
tar -C "$STAGING" -czf "$TAR" . 2>/dev/null || true
echo "$TAR"
"""
for ip in storage_ips:
print(f"=== {ip} ===", flush=True)
cmd = ssh_base + [f"{ssh_user}@{ip}", "bash", "-s"]
p = subprocess.run(cmd, input=remote_script.encode(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=False)
out = p.stdout.decode(errors="replace").strip().splitlines()
last = out[-1].strip() if out else ""
last = last.replace("\r", "")
if last in ("NO_SPDK_CONTAINER", "BDEV_EMPTY", "NO_DISTRIBS") or not last.startswith("/tmp/"):
print(f"[{ip}] WARN: distrib collection skipped/failed: {last or '(no output)'}", flush=True)
continue
dest_dir = f"{run_base}/{ip}/{tag}"
os.makedirs(dest_dir, exist_ok=True)
scp_cmd = scp_base + [f"{ssh_user}@{ip}:{last}", dest_dir + "/"]
subprocess.run(scp_cmd, check=False)
print(f"[{ip}] Saved -> {dest_dir}/{os.path.basename(last)}", flush=True)
PY
# =========================
# SUMMARY (always)
# =========================
- name: Write Job Summary
if: always()
shell: bash
run: |
set -euxo pipefail
mgmt_ip="$(echo "${MNODES}" | awk '{print $1}')"
out_log="sbcli/e2e/output.log"
# --- Timing ---
start="${TEST_START_EPOCH:-0}"
end="${TEST_END_EPOCH:-0}"
dur_sec=0
if [[ "$start" =~ ^[0-9]+$ && "$end" =~ ^[0-9]+$ && "$end" -ge "$start" ]]; then
dur_sec=$((end-start))
fi
dur_h=$((dur_sec/3600)); dur_m=$(((dur_sec%3600)/60)); dur_s=$((dur_sec%60))
dur_fmt="${dur_h}h ${dur_m}m ${dur_s}s"
# --- Parse test counts from output.log (upgrade_e2e.py format) ---
total_cases=0; passed_cases=0; failed_cases=0; skipped_cases=0
if [[ -f "${out_log}" ]]; then
v="$(grep -m1 'Total Cases:' "${out_log}" | grep -oE '[0-9]+$' 2>/dev/null || true)"; [[ "${v}" =~ ^[0-9]+$ ]] && total_cases="${v}"
v="$(grep -m1 'Passed:' "${out_log}" | grep -oE '[0-9]+$' 2>/dev/null || true)"; [[ "${v}" =~ ^[0-9]+$ ]] && passed_cases="${v}"
v="$(grep -m1 'Failed:' "${out_log}" | grep -oE '[0-9]+$' 2>/dev/null || true)"; [[ "${v}" =~ ^[0-9]+$ ]] && failed_cases="${v}"
v="$(grep -m1 'Skipped:' "${out_log}" | grep -oE '[0-9]+$' 2>/dev/null || true)"; [[ "${v}" =~ ^[0-9]+$ ]] && skipped_cases="${v}"
fi
pass_pct=0; fail_pct=0; skip_pct=0
if [[ "${total_cases}" -gt 0 ]]; then
pass_pct=$(( (passed_cases * 100) / total_cases ))
fail_pct=$(( (failed_cases * 100) / total_cases ))
skip_pct=$(( (skipped_cases * 100) / total_cases ))
fi
# --- Parse per-test status ---
test_details_table=""
if [[ -f "${out_log}" ]]; then
while IFS= read -r line; do
clean="$(printf '%s' "${line}" | sed 's/\x1b\[[0-9;]*m//g')"
test_name="$(printf '%s' "${clean}" | grep -oE 'Test[A-Za-z0-9]+' | head -n1 || true)"
[[ -z "${test_name}" ]] && continue
if printf '%s' "${clean}" | grep -qi 'PASSED'; then icon="✅"; status="PASSED"
elif printf '%s' "${clean}" | grep -qi 'FAILED'; then icon="❌"; status="FAILED"
elif printf '%s' "${clean}" | grep -qi 'SKIPPED'; then icon="⏭"; status="SKIPPED"
else continue
fi
test_details_table+="| \`${test_name}\` | ${icon} ${status} |"$'\n'
done < <(grep -iE 'PASSED|FAILED|SKIPPED' "${out_log}" 2>/dev/null || true)
fi
# --- Failure reason ---
failure_reason=""
if [[ -f "${out_log}" ]]; then
multi="$(grep 'MultipleExceptions:' "${out_log}" | sed 's/\x1b\[[0-9;]*m//g' || true)"
if [[ -n "${multi}" ]]; then
failure_reason="${multi}"
elif grep -Eqi 'Traceback \(most recent call last\)|Exception:|AssertionError|Input/output error' "${out_log}"; then
failure_reason="$(grep -Ei 'Traceback \(most recent call last\)|Exception:|AssertionError|Input/output error' "${out_log}" | tail -n 3 | sed 's/\x1b\[[0-9;]*m//g' || true)"
fi
fi
# --- Mgmt artifacts list ---
mgmt_dir="${RUN_BASE_DIR:-}/${mgmt_ip}/mgmt_details/mgmt"
mgmt_files="(not found)"
if [[ -n "${RUN_BASE_DIR:-}" && -d "${mgmt_dir}" ]]; then
mgmt_files="$(find "${mgmt_dir}" -maxdepth 1 -type f -printf '%f (%s bytes)\n' 2>/dev/null | sort || true)"
[[ -n "${mgmt_files}" ]] || mgmt_files="(empty)"
fi
# --- Overall result ---
conclusion="✅ SUCCESS"
if [[ "${{ job.status }}" != "success" ]]; then
conclusion="❌ FAILED"
fi
{
echo "## SimplyBlock Upgrade E2E Run Summary"
echo ""
echo "**Result:** ${conclusion} &nbsp;|&nbsp; **Duration:** ${dur_fmt}"
echo ""
echo "### Upgrade"
echo "| | Branch | SPDK Image |"
echo "|---|---|---|"
echo "| **Base** | \`${BASE_SBCLI_BRANCH}\` | \`${BASE_SPDK_IMAGE:-default}\` |"
echo "| **Target** | \`${TARGET_SBCLI_BRANCH}\` | \`${TARGET_SPDK_IMAGE:-default}\` |"
echo ""
echo "### Test Results"
echo "| | Count | % |"
echo "|---|---|---|"
echo "| ✅ Passed | ${passed_cases} | ${pass_pct}% |"
echo "| ❌ Failed | ${failed_cases} | ${fail_pct}% |"
echo "| ⏭ Skipped | ${skipped_cases} | ${skip_pct}% |"
echo "| **Total** | **${total_cases}** | |"
echo ""
if [[ -n "${test_details_table}" ]]; then
echo "### Test Case Details"
echo "| Test | Result |"
echo "|---|---|"
printf '%s' "${test_details_table}"
echo ""
fi
echo "### Run Info"
echo "- **Test class:** \`${TEST_CLASS:-all}\`"
echo "- **Cluster ID:** \`${CLUSTER_ID}\`"
echo "- **Mgmt node:** \`${mgmt_ip}\`"
echo "- **Start (UTC):** ${TEST_START_HUMAN:-unknown}"
echo "- **End (UTC):** ${TEST_END_HUMAN:-unknown}"
echo ""
if [[ -n "${failure_reason}" ]]; then
echo "### Failure Reason"
echo '```'
printf '%s\n' "${failure_reason}"
echo '```'
echo ""
fi
if [[ -n "${RUN_BASE_DIR:-}" ]]; then
echo "<details><summary>Run Artifacts (NFS)</summary>"
echo ""
echo "- **Run dir:** \`${RUN_BASE_DIR}/\`"
echo "- Mgmt details: \`${RUN_BASE_DIR}/${mgmt_ip}/mgmt_details/\`"
echo "- Docker logs: \`${RUN_BASE_DIR}/<node_ip>/containers-final-*/\`"
echo "- Distrib dumps: \`${RUN_BASE_DIR}/<storage_ip>/finaldistrib_bdev_logs/\`"
echo ""
echo "</details>"
echo ""
fi
echo "<details><summary>Mgmt Artifacts (cluster state at end of run)</summary>"
echo ""
echo "Path: \`${mgmt_dir}\`"
echo ""
echo '```'
printf '%s\n' "${mgmt_files}"
echo '```'
echo ""
echo "</details>"
} >> "$GITHUB_STEP_SUMMARY"
- name: Send Slack Notification
if: always()
shell: bash
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
JOB_STATUS: ${{ job.status }}
SLACK_RUN_URL: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
GITHUB_REF_NAME: ${{ github.ref_name }}
SLACK_WF_NAME: "E2E (Upgrade Bootstrap)"
run: |
python3 - <<'PYEOF'
import json, os, re, sys, urllib.request, urllib.error
webhook = os.environ.get("SLACK_WEBHOOK_URL", "")
if not webhook:
print("No SLACK_WEBHOOK_URL set, skipping.")
sys.exit(0)
out_log = "sbcli/e2e/output.log"
content = open(out_log).read() if os.path.isfile(out_log) else ""
# --- Counts (upgrade_e2e.py format) ---
def px(pat):
m = re.search(pat, content)
return int(m.group(1)) if m else 0
total = px(r'Total Cases:\s*(\d+)')
passed = px(r'Passed:\s*(\d+)')
failed = px(r'Failed:\s*(\d+)')
skipped = px(r'Skipped:\s*(\d+)')
pass_pct = (passed * 100 // total) if total > 0 else 0
# --- Per-test results ---
ansi = re.compile(r'\x1b\[[0-9;]*m')
test_results = []
for line in content.splitlines():
clean = ansi.sub('', line)
m = re.search(r'Test[A-Za-z0-9_]+', clean)
if not m:
continue
name = m.group(0)
if 'PASSED' in clean: test_results.append(('PASSED', name))
elif 'FAILED' in clean: test_results.append(('FAILED', name))
elif 'SKIPPED' in clean: test_results.append(('SKIPPED', name))
# --- Failure reason ---
failure_reason = ""
multi = [ansi.sub('', l) for l in content.splitlines() if 'MultipleExceptions:' in l]
if multi:
failure_reason = multi[0][:2000]
elif content:
exc_lines = [ansi.sub('', l) for l in content.splitlines()
if re.search(r'(Exception:|AssertionError|Input/output error)', l)]
if exc_lines:
failure_reason = '\n'.join(exc_lines[-5:])[:2000]
# --- Env ---
s = int(os.environ.get("TEST_START_EPOCH", "0") or "0")
e = int(os.environ.get("TEST_END_EPOCH", "0") or "0")
secs = max(0, e - s) if e >= s > 0 else 0
dur = f"{secs//3600}h {(secs%3600)//60}m {secs%60}s"
run_url = os.environ.get("SLACK_RUN_URL", "")
log_dir = os.environ.get("RUN_BASE_DIR", "N/A")
base_branch = os.environ.get("BASE_SBCLI_BRANCH", "?")
base_spdk = os.environ.get("BASE_SPDK_IMAGE", "") or "default"
target_branch = os.environ.get("TARGET_SBCLI_BRANCH", "?")
target_spdk = os.environ.get("TARGET_SPDK_IMAGE", "") or "default"
test_cls = os.environ.get("TEST_CLASS", "") or "all"
branch = os.environ.get("GITHUB_REF_NAME", "?")
wf_name = os.environ.get("SLACK_WF_NAME", "Run")
ok = os.environ.get("JOB_STATUS", "") == "success"
icon = ":white_check_mark:" if ok else ":x:"
status = "SUCCESS" if ok else "FAILURE"
mention = "" if ok else " <!channel>"
lines = [
f"{icon} *SimplyBlock {wf_name}*{mention}",
f"*Status:* {status} | *Duration:* {dur}",
f"*Branch:* `{branch}` | *Test class:* `{test_cls}`",
f"*Upgrade:* `{base_branch}` → `{target_branch}`",
f"*Base SPDK:* `{base_spdk}` | *Target SPDK:* `{target_spdk}`",
"",
]
if total > 0:
lines += [
f":white_check_mark: *Passed:* {passed}/{total} ({pass_pct}%)",
f":x: *Failed:* {failed}",
f":fast_forward: *Skipped:* {skipped}",
]
else:
lines.append("_(test counts not found in log)_")
if test_results:
lines.append("")
lines.append("*Test Results:*")
icons = {'PASSED': ':white_check_mark:', 'FAILED': ':x:', 'SKIPPED': ':fast_forward:'}
for st, nm in test_results:
lines.append(f"{icons.get(st, ':grey_question:')} `{nm}`")
if failure_reason:
lines += ["", "*Failure:*", f"```{failure_reason}```"]
lines += [
"",
f":link: *Run:* <{run_url}|View on GitHub>",
f":file_folder: *Final Logs:* `{log_dir}`",
]
payload = {"text": "\n".join(lines)}
req = urllib.request.Request(
webhook,
data=json.dumps(payload).encode(),
headers={"Content-Type": "application/json"},
)
try:
urllib.request.urlopen(req, timeout=15)
print("Slack notification sent.")
except Exception as exc:
print(f"WARN: Slack notification failed: {exc}", file=sys.stderr)
PYEOF
- name: Upload logs (always)
if: always()
uses: actions/upload-artifact@v4
with:
name: simplyblock-upgrade-logs-${{ github.run_id }}${{ inputs.RUN_LABEL != '' && format('-{0}', inputs.RUN_LABEL) || '' }}
path: |
simplyBlockDeploy/bare-metal/bootstrap.log
sbcli/e2e/output.log
sbcli/e2e/logs/**
if-no-files-found: warn
- name: Export MGMT_IP (first MNODES)
if: always()
shell: bash
run: |
echo "MGMT_IP=$(echo "${MNODES}" | awk '{print $1}')" >> "$GITHUB_ENV"
- name: Upload small artifacts (always)
if: always() && env.RUN_BASE_DIR != '' && env.MGMT_IP != ''
uses: actions/upload-artifact@v4
with:
name: simplyblock-upgrade-small-logs-${{ github.run_id }}${{ inputs.RUN_LABEL != '' && format('-{0}', inputs.RUN_LABEL) || '' }}
path: |
sbcli/e2e/output.log
${{ env.RUN_BASE_DIR }}/${{ env.MGMT_IP }}/mgmt_details/mgmt/*.txt
${{ env.RUN_BASE_DIR }}/${{ env.MGMT_IP }}/mgmt_details/subtasks/*.txt
${{ env.RUN_BASE_DIR }}/${{ env.MGMT_IP }}/mgmt_details/storage_nodes/*.txt
if-no-files-found: warn