Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
4383313
DAOS-17737 dtx: handle race between DTX refresh and DTX abort - b26 (…
Nasf-Fan Jul 22, 2025
78bf39e
SRE-3194 build: Remove redhat-lsb-core from Dockerfile.mockbuild (#16…
ryon-jensen Jul 23, 2025
81f7182
DAOS-17738 client: reset DTX base UUID after fork - b26 (#16540)
Nasf-Fan Jul 24, 2025
f4fe123
DAOS-17748 control: Add verification to raft-db Add/Remove wrappers (…
tanabarr Jul 24, 2025
15b8a86
DAOS-17780 bio: Fix use-after-free in JSON parsing (#16592) (#16593)
tanabarr Jul 25, 2025
6691c74
DAOS-17772 rebuild: fix a race condition between fetch and aggregatio…
liuxuezhao Jul 28, 2025
88e15e0
DAOS-17547 rebuild: error on stopped ds_pool_child (#16382) (#16600)
NiuYawei Jul 28, 2025
672e086
DAOS-17492 control: Ensure updated members can become voters (#16392)…
kjacque Jul 28, 2025
6f028cd
SRE-3236 LEAP-15 LUA-LMOD hack fix for Leap 15.6 (#16676)
JohnMalmberg Jul 30, 2025
4b81428
DAOS-17835 doc: Document how to add/remove MS replica (#16651) (#16683)
kjacque Jul 30, 2025
eeddeea
DAOS-17534 dtx: not add cont to batched commit list if being stopped …
Nasf-Fan Jul 31, 2025
25cc8d7
DAOS-17872 build: Tag 2.6.4 rc2 (#16705)
daltonbohning Aug 5, 2025
0be3b9f
DAOS-17534 dtx: avoid repeatedly adding item into batched commit list…
Nasf-Fan Aug 12, 2025
6a68080
DAOS-17877 cq: give create_release.yml write permission (#16708) (#16…
daltonbohning Aug 12, 2025
e46f9a1
DAOS-17876 control: Expect lowercase hostname in unit test (#16710) (…
kjacque Aug 12, 2025
c4ddd51
DAOS-17828 vos: fix a pointer misuse (#16701)
janekmi Aug 13, 2025
5432212
DAOS-16557 test: Add debug to NvmeEnospace ftest (#15559) (#16728)
knard38 Aug 27, 2025
5daa188
DAOS-17783 test: Suppress NLT false positives in Go (#16615) (#16680)
kjacque Aug 27, 2025
e66baed
DAOS-17591 dtx: handle orphan DTX entries - b26 (#16483)
Nasf-Fan Sep 2, 2025
09ce3d7
Merge remote-tracking branch 'comm/release/2.6' into jeffolivier/goog…
jolivier23 Sep 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/actions/make_release/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash -l

set -e

# Only need to do any of this if the version has been updated
# NOTE: The diff-index with HEAD^ implies that the TAG
# must be updated in the last commit. But version update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/create_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ jobs:
name: Create Release
if: github.repository == 'daos-stack/daos'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion TAG
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.4-rc1
2.6.4-rc2
79 changes: 64 additions & 15 deletions ci/provisioning/post_provision_config.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright 2020-2023 Intel Corporation.

Check failure on line 3 in ci/provisioning/post_provision_config.sh

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
Expand All @@ -25,10 +25,54 @@
# shellcheck disable=SC1091
source ci/junit.sh

# This script needs to be able to run outside of CI for testing.
# Before running the script, environment variables may be needed for
# the specific site.

: "${MLNX_VER_NUM:=24.04-0.6.6.0}"

: "${DISTRO:=EL_7}"
# This is tangled and needs a better fix as it has DISTRO being passed
# as EL_8 for EL_9, yet other places expect DISTRO to really be EL_8 and
# not EL_9.

# As caller has to be fixed later first set defaults for use outside of CI
: "${DISTRO:=unknown}"

# When running outside of CI, we can assume that this is run on the target
# system, and if DISTRO is unknown, we can look it up.
if [[ "$DISTRO" == unknown ]]; then
# shellcheck disable=SC1091
source /etc/os-release
: "${ID_LIKE:=rhel}"
: "${ID:=unknown}"
: "${VERSION_ID:=8}"
prefix="EL"
version="${VERSION_ID%%.*}"
if [[ "$ID_LIKE" == *suse* ]]; then
prefix="LEAP"
elif [[ "$ID" == *ubuntu* ]]; then
prefix="UBUNTU"
version="$VERSION_ID"
fi
DISTRO="${prefix}_${version}"
fi

# Helper scripts should be distro family specific not distro version specific
FAMILY="${DISTRO%%_*}"

# NODELIST is all the nodes in a CI cluster comma separated - do not use here.
# NODESTRING is only the nodes in the requested CI cluster.
: "${NODESTRING:=localhost}"

: "${COMMIT_MESSAGE:=$(git log -1 --pretty=%B)}"
: "${ARTIFACTORY_URL:=}"
: "${REPO_FILE_URL:=}"
if [ -n "$ARTIFACTORY_URL" ] && [ -z "$REPO_FILE_URL" ]; then
REPO_FILE_URL="$ARTIFACTORY_URL/repo-files/"
fi

# CI user can be any user that is not expected to be on the test systems.
: "${CI_USER:=jenkins}"

retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ci_key* --dest=/tmp/

Expand All @@ -54,10 +98,13 @@
return 0
}

if create_host_file "$NODESTRING" "./hosts" "/etc/hosts"; then
retry_cmd 300 clush -B -S -l root -w "$NODESTRING" -c ./hosts --dest=/etc/hosts
else
echo "ERROR: Failed to create host file"
if [ "$NODESTRING" != "localhost" ]; then
if create_host_file "$NODESTRING" "./hosts" "/etc/hosts"; then
retry_cmd 300 clush -B -S -l root -w "$NODESTRING" \
-c ./hosts --dest=/etc/hosts
else
echo "ERROR: Failed to create host file"
fi
fi


Expand All @@ -67,22 +114,23 @@
if ! retry_cmd 2400 clush -B -S -l root -w "$NODESTRING" \
"export PS4='$PS4'
MY_UID=$(id -u)
CI_USER=\"${CI_USER}\"
CONFIG_POWER_ONLY=${CONFIG_POWER_ONLY:-}
INST_REPOS=\"${INST_REPOS:-}\"
INST_RPMS=\"${INST_RPMS:-}\"
GPG_KEY_URLS=\"${GPG_KEY_URLS:-}\"
REPOSITORY_URL=\"${REPOSITORY_URL:-}\"
JENKINS_URL=\"${JENKINS_URL:-}\"
DISTRO=\"$DISTRO\"
DAOS_STACK_RETRY_DELAY_SECONDS=\"$DAOS_STACK_RETRY_DELAY_SECONDS\"
DAOS_STACK_RETRY_COUNT=\"$DAOS_STACK_RETRY_COUNT\"
DAOS_STACK_RETRY_DELAY_SECONDS=\"${DAOS_STACK_RETRY_DELAY_SECONDS:-}\"
DAOS_STACK_RETRY_COUNT=\"${DAOS_STACK_RETRY_COUNT:-}\"
MLNX_VER_NUM=\"$MLNX_VER_NUM\"
BUILD_URL=\"$BUILD_URL\"
STAGE_NAME=\"$STAGE_NAME\"
OPERATIONS_EMAIL=\"$OPERATIONS_EMAIL\"
BUILD_URL=\"${BUILD_URL:-}\"
STAGE_NAME=\"${STAGE_NAME:-}\"
OPERATIONS_EMAIL=\"${OPERATIONS_EMAIL:-}\"
COMMIT_MESSAGE=\"$sanitized_commit_message\"
REPO_FILE_URL=\"$REPO_FILE_URL\"
ARTIFACTORY_URL=\"${ARTIFACTORY_URL:-}\"
ARTIFACTORY_URL=\"${ARTIFACTORY_URL}\"
BRANCH_NAME=\"${BRANCH_NAME:-}\"
CHANGE_TARGET=\"${CHANGE_TARGET:-}\"
CI_RPM_TEST_VERSION=\"${CI_RPM_TEST_VERSION:-}\"
Expand All @@ -91,12 +139,13 @@
REPO_PATH=\"${REPO_PATH:-}\"
ARTIFACTS_URL=\"${ARTIFACTS_URL:-}\"
COVFN_DISABLED=\"${COVFN_DISABLED:-true}\"
DAOS_CI_INFO_DIR=\"${DAOS_CI_INFO_DIR:-wolf-2:/export/scratch}\"
DAOS_CI_INFO_DIR=\"${DAOS_CI_INFO_DIR:-}\"
CI_SCONS_ARGS=\"${CI_SCONS_ARGS:-}\"
$(cat ci/stacktrace.sh)
$(cat ci/junit.sh)
$(cat ci/provisioning/post_provision_config_common_functions.sh)
$(cat ci/provisioning/post_provision_config_common.sh)
$(cat ci/provisioning/post_provision_config_nodes_"$DISTRO".sh)
$(cat ci/provisioning/post_provision_config_nodes_"$FAMILY".sh)
$(cat ci/provisioning/post_provision_config_nodes.sh)"; then
report_junit post_provision_config.sh results.xml "$NODESTRING"
exit 1
Expand All @@ -105,7 +154,7 @@
git log --format=%B -n 1 HEAD | sed -ne '1s/^\([A-Z][A-Z]*-[0-9][0-9]*\) .*/\1/p' \
-e '/^Fixes:/{s/^Fixes: *//;s/ /\
/g;p}' | \
retry_cmd 60 ssh -i ci_key -l jenkins "${NODELIST%%,*}" \
retry_cmd 60 ssh -i ci_key -l "$CI_USER" "${NODESTRING%%,*}" \
"cat >/tmp/commit_fixes"
git log --pretty=format:%h --abbrev-commit --abbrev=7 |
retry_cmd 60 ssh -i ci_key -l jenkins "${NODELIST%%,*}" "cat >/tmp/commit_list"
retry_cmd 60 ssh -i ci_key -l "$CI_USER" "${NODESTRING%%,*}" "cat >/tmp/commit_list"
77 changes: 31 additions & 46 deletions ci/provisioning/post_provision_config_common_functions.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright 2022-2023 Intel Corporation.

Check failure on line 3 in ci/provisioning/post_provision_config_common_functions.sh

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
Expand Down Expand Up @@ -94,6 +94,13 @@
send_mail "Command retry successful in $STAGE_NAME after $((attempt + 1)) attempts using ${repo_servers[0]} as initial repo server " \
"Command: ${args[*]}\nAttempts: $attempt\nStatus: $rc"
fi
if [ -n "$ARTIFACTORY_URL" ]; then
dnfx="dnf"
if command -v dnf4; then
dnfx="dnf4"
fi
"$dnfx" config-manager --disable 'epel*' || true
fi
return 0
fi
# Command failed, retry
Expand Down Expand Up @@ -229,21 +236,6 @@
return "$rc"
}

fetch_repo_config() {
local repo_server="$1"

# shellcheck disable=SC1091
. /etc/os-release
local repo_file="daos_ci-${ID}${VERSION_ID%%.*}-$repo_server"
local repopath="${REPOS_DIR}/$repo_file"
if ! curl -f -o "$repopath" "$REPO_FILE_URL$repo_file.repo"; then
echo "Failed to fetch repo file $REPO_FILE_URL$repo_file.repo"
return 1
fi

return 0
}

pr_repos() {
if [ -n "$CI_PR_REPOS" ]; then
echo "$CI_PR_REPOS"
Expand Down Expand Up @@ -274,9 +266,6 @@
# shellcheck disable=SC1091
. /etc/os-release

rm -f "$REPOS_DIR/daos_ci-${ID}${VERSION_ID%%.*}".repo
ln "$REPOS_DIR/daos_ci-${ID}${VERSION_ID%%.*}"{-"$repo_server",.repo}

if [ "$repo_server" = "artifactory" ]; then
if { [[ \ $(pr_repos) = *\ daos@PR-* ]] || [ -z "$(rpm_test_version)" ]; } &&
[[ ! ${CHANGE_TARGET:-$BRANCH_NAME} =~ ^[-.0-9A-Za-z]+-testing ]]; then
Expand All @@ -298,30 +287,6 @@
update_repos() {
local DISTRO_NAME="$1"

# Update the repo files
local repo_server
for repo_server in "${repo_servers[@]}"; do
if ! fetch_repo_config "$repo_server"; then
# leave the existing on-image repo config alone if the repo fetch fails
send_mail "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos."
echo "Fetch repo file for repo server \"$repo_server\" failed. Continuing on with in-image repos."
return 1
fi
done

# we're not actually using the set_local_repos.sh script
# setting a repo server is as easy as renaming a file
#if ! curl -o /usr/local/sbin/set_local_repos.sh-tmp "${REPO_FILE_URL}set_local_repos.sh"; then
# send_mail "Fetch set_local_repos.sh failed. Continuing on with in-image copy."
#else
# cat /usr/local/sbin/set_local_repos.sh-tmp > /usr/local/sbin/set_local_repos.sh
# chmod +x /usr/local/sbin/set_local_repos.sh
# rm -f /usr/local/sbin/set_local_repos.sh-tmp
#fi

# successfully grabbed them all, so replace the entire $REPOS_DIR
# content with them

# This is not working right on a second run.
# using a quick hack to stop deleting a critical repo
local file
Expand Down Expand Up @@ -353,19 +318,39 @@
dnf -y erase fuse3\*
fi

if $CONFIG_POWER_ONLY; then
if [ -n "$CONFIG_POWER_ONLY" ]; then
rm -f "$REPOS_DIR"/*_job_daos-stack_job_*_job_*.repo
time dnf -y erase fio fuse ior-hpc mpich-autoload \
ompi argobots cart daos daos-client dpdk \
fuse-libs libisa-l libpmemobj mercury mpich \
argobots cart daos daos-client dpdk \
libisa-l libpmemobj mercury mpich \
pmix protobuf-c spdk libfabric libpmem \
munge-libs munge slurm \
slurm-example-configs slurmctld slurm-slurmmd
fi

cat /etc/os-release

if lspci | grep "ConnectX-6" && ! grep MOFED_VERSION /etc/do-release; then
# ConnectX must be 5 or later to support MOFED/DOCA drivers
# RoCE tests with Mellanox adapters may use MOFED/DOCA drivers.
last_pci_bus=''
mellanox_drivers=false
while IFS= read -r line; do
pci_bus="${line%.*}"
if [ "$pci_bus" == "$last_pci_bus" ]; then
# We only use one interface on a dual interface HBA
# Fortunately lspci appears to group them together
continue
fi
last_pci_bus="$pci_bus"
mlnx_type="${line##*ConnectX-}"
mlnx_type="${mlnx_type%]*}"
if [ "$mlnx_type" -ge 5 ]; then
mellanox_drivers=true
break
fi
done < <(lspci -mm | grep "ConnectX")

if "$mellanox_drivers"; then
# Remove OPA and install MOFED
install_mofed
fi
Expand Down
43 changes: 32 additions & 11 deletions ci/provisioning/post_provision_config_nodes.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright 2020-2023 Intel Corporation.

Check failure on line 3 in ci/provisioning/post_provision_config_nodes.sh

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
# Copyright 2025 Hewlett Packard Enterprise Development LP
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
Expand All @@ -16,14 +16,29 @@
bootstrap_dnf
fi

if ! grep ":$MY_UID:" /etc/group; then
groupadd -g "$MY_UID" jenkins
fi
# If in CI use made up user "Jenkins" with UID that the build agent is
# currently using. Not sure that the UID is actually important any more
# and that parameter can probably be removed in the future.
# Nothing actually cares what the account name is as long as it does not
# conflict with an existing name and we are consistent in its use.
CI_USER="jenkins"

mkdir -p /localhome
if ! grep ":$MY_UID:$MY_UID:" /etc/passwd; then
useradd -b /localhome -g "$MY_UID" -u "$MY_UID" -s /bin/bash jenkins
if ! getent passwd "$CI_USER"; then
# If that UID already exists, then this is not being run in CI.
if ! getent passwd "$MY_UID"; then
if ! getent group "$MY_UID"; then
groupadd -g "$MY_UID" "$CI_USER"
fi
useradd -b /localhome -g "$MY_UID" -u "$MY_UID" -s /bin/bash "$CI_USER"
else
# Still need a "$CI_USER" account, so just make one up.
useradd -b /localhome -s /bin/bash "$CI_USER"
fi
fi
jenkins_ssh=/localhome/jenkins/.ssh
ci_uid="$(id -u $CI_USER)"
ci_gid="$(id -g $CI_USER)"
jenkins_ssh=/localhome/"$CI_USER"/.ssh
mkdir -p "${jenkins_ssh}"
if ! grep -q -s -f /tmp/ci_key.pub "${jenkins_ssh}/authorized_keys"; then
cat /tmp/ci_key.pub >> "${jenkins_ssh}/authorized_keys"
Expand All @@ -37,12 +52,18 @@
cp /tmp/ci_key_ssh_config "${jenkins_ssh}/config"
chmod 700 "${jenkins_ssh}"
chmod 600 "${jenkins_ssh}"/{authorized_keys,id_rsa*,config}
chown -R jenkins.jenkins /localhome/jenkins/
echo "jenkins ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/jenkins
chown -R "${ci_uid}.${ci_gid}" "/localhome/${CI_USER}/"
echo "$CI_USER ALL=(ALL) NOPASSWD: ALL" > "/etc/sudoers.d/$CI_USER"

# /scratch is needed on test nodes
mkdir -p /scratch
retry_cmd 2400 mount "${DAOS_CI_INFO_DIR}" /scratch
# /scratch is needed on test nodes to be CI info for now.
# DAOS tests need to be changed to use /CIShare instead.
if [ -n "$DAOS_CI_INFO_DIR" ]; then
mkdir -p /CIShare
retry_cmd 2400 mount "${DAOS_CI_INFO_DIR}" /CIShare
# This part only until DAOS is migrated to use /CIShare
rm -f /scratch
ln -sfn /CIShare /scratch
fi

# defined in ci/functional/post_provision_config_nodes_<distro>.sh
# and catted to the remote node along with this script
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,27 +66,26 @@ install_mofed() {
gversion="${gversion%.*}"
fi

# Add a repo to install Mellanox_OFED RPMS
: "${ARTIFACTORY_URL:=https://artifactory.dc.hpdd.intel.com/artifactory/}"
# Temporary fix
if [[ ${ARTIFACTORY_URL} != *"/artifactory" ]]; then
ARTIFACTORY_URL="${ARTIFACTORY_URL}artifactory"
: "${ARTIFACTORY_URL:=}"
if [ -z "$ARTIFACTORY_URL" ]; then
return
fi
mellanox_proxy="${ARTIFACTORY_URL}/mellanox-proxy/mlnx_ofed/"
mellanox_key_url="${ARTIFACTORY_URL}/mlnx_ofed/RPM-GPG-KEY-Mellanox"
rpm --import "$mellanox_key_url"
repo_url="$mellanox_proxy$MLNX_VER_NUM/rhel$gversion/x86_64/"
dnf -y config-manager --add-repo="$repo_url"
dnf -y config-manager --save --setopt="$(url_to_repo "$repo_url")".gpgcheck=1
dnf repolist || true

time dnf -y install mlnx-ofed-basic ucx-cma ucx-ib ucx-knem ucx-rdmacm ucx-xpmem
# Install Mellanox OFED or DOCA RPMS
install_mellanox="install_mellanox.sh"
script_url="${ARTIFACTORY_URL}/raw-internal/sre_tools/$install_mellanox"
install_target="/usr/local/sbin/$install_mellanox"

# now, upgrade firmware
time dnf -y install mlnx-fw-updater
if [ ! -e "$install_target" ]; then
if ! curl --silent --show-error --fail \
-o "/usr/local/sbin/$install_mellanox" "$script_url"; then
echo "Failed to fetch $script_url"
return 1
fi
chmod 0755 "$install_target"
fi

# Make sure that tools are present.
#ls /usr/bin/ib_* /usr/bin/ibv_*
MELLANOX_VERSION="$MLNX_VER_NUM" "$install_mellanox"

dnf list --showduplicates perftest
if [ "$gversion" == "8.5" ]; then
Expand All @@ -96,5 +95,4 @@ install_mofed() {
dnf list --showduplicates ucx-knem
dnf remove -y ucx-knem || true
fi

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
bootstrap_dnf() {
rm -rf "$REPOS_DIR"
ln -s ../zypp/repos.d "$REPOS_DIR"
dnf -y remove lua-lmod
dnf -y remove lua54 lua-lmod
dnf -y --nogpgcheck install lua-lmod '--repo=*lua*' --repo '*network-cluster*'
}

Expand Down
File renamed without changes.
6 changes: 6 additions & 0 deletions debian/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
daos (2.6.4-2) unstable; urgency=medium
[ Dalton Bohning ]
* Second release candidate for 2.6.4

-- Dalton Bohning <[email protected]> Tue, 05 Aug 2025 08:30:00 -0800

daos (2.6.4-1) unstable; urgency=medium
[ Phillip Henderson ]
* First release candidate for 2.6.4
Expand Down
Loading
Loading