Skip to content

Commit 48c25a2

Browse files
QuantumMisakaKai Luo
authored andcommitted
[develop][Toolchain] fix elpa-gpu installation problem in toolchain (deepmodeling#6632)
* fix(toolchain): fix elpa-gpu installation in legacy way * chore(toolchain) add -e for gnu/intel build * fix(openblas): 修正openblas包名大小写问题 * fix(toolchain): 修复wget下载时错误输出被重定向的问题
1 parent 49b5005 commit 48c25a2

File tree

6 files changed

+19
-6
lines changed

6 files changed

+19
-6
lines changed

toolchain/build_abacus_gnu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
22
#SBATCH -J build_abacus_gnu
33
#SBATCH -N 1
44
#SBATCH -n 16

toolchain/build_abacus_intel.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/bash
1+
#!/bin/bash -e
22
#SBATCH -J build_abacus_intel
33
#SBATCH -N 1
44
#SBATCH -n 16

toolchain/scripts/lib/config_manager.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,11 @@ config_validate() {
481481
CONFIG_CACHE["ARCH_NUM"]="no"
482482
fi
483483

484+
# Backward compatibility: also export ARCH_NUM to environment when set
485+
if [[ -n "${CONFIG_CACHE[ARCH_NUM]}" ]]; then
486+
export ARCH_NUM="${CONFIG_CACHE[ARCH_NUM]}"
487+
fi
488+
484489
return 0
485490
}
486491

@@ -606,6 +611,10 @@ config_export_to_env() {
606611
for key in "${!CONFIG_CACHE[@]}"; do
607612
export "$key"="${CONFIG_CACHE[$key]}"
608613
done
614+
615+
# Backward compatibility for stage scripts expecting uppercase GPU flags
616+
# Installers (e.g., stage3/install_elpa.sh) read ENABLE_CUDA, not enable_cuda
617+
export ENABLE_CUDA="${CONFIG_CACHE[enable_cuda]}"
609618

610619
# Export package list variables
611620
export tool_list

toolchain/scripts/stage2/install_openblas.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ if [[ -z "$version_suffix" && -n "${ABACUS_TOOLCHAIN_VERSION_SUFFIX}" ]]; then
2828
fi
2929
# Load package variables with appropriate version
3030
load_package_vars "openblas" "$version_suffix"
31-
openblas_pkg="openblas-${openblas_ver}.tar.gz"
31+
openblas_pkg="OpenBLAS-${openblas_ver}.tar.gz"
3232

3333
source "${INSTALLDIR}"/toolchain.conf
3434
source "${INSTALLDIR}"/toolchain.env

toolchain/scripts/stage3/install_elpa.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,11 @@ case "$with_elpa" in
114114
fi
115115
fi
116116
for TARGET in "cpu" "nvidia"; do
117-
[ "$TARGET" = "nvidia" ] && [ "$ENABLE_CUDA" != "__TRUE__" ] && continue
117+
# Accept both uppercase and lowercase GPU enable flags for compatibility
118+
gpu_enabled="${ENABLE_CUDA:-${enable_cuda}}"
119+
[ "$TARGET" = "nvidia" ] && [ "$gpu_enabled" != "__TRUE__" ] && continue
118120
# disable cpu if cuda is enabled, only install one
119-
[ "$TARGET" != "nvidia" ] && [ "$ENABLE_CUDA" = "__TRUE__" ] && continue
121+
[ "$TARGET" != "nvidia" ] && [ "$gpu_enabled" = "__TRUE__" ] && continue
120122
# extend the pkg_install_dir by TARGET
121123
# this linking method is totally different from cp2k toolchain
122124
# for cp2k, ref https://github.com/cp2k/cp2k/commit/6fe2fc105b8cded84256248f68c74139dd8fc2e9
@@ -139,6 +141,7 @@ case "$with_elpa" in
139141
--with-cuda-path=${CUDA_PATH:-${CUDA_HOME:-/CUDA_HOME-notset}} \
140142
--enable-nvidia-gpu-kernels=$([ "$TARGET" = "nvidia" ] && echo "yes" || echo "no") \
141143
--with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_70") \
144+
--enable-nvidia-cub --with-cusolver \
142145
OMPI_MCA_plm_rsh_agent=/bin/false \
143146
FC=${MPIFC} \
144147
CC=${MPICC} \
@@ -170,6 +173,7 @@ case "$with_elpa" in
170173
--enable-nvidia-gpu-kernels=$([ "$TARGET" = "nvidia" ] && echo "yes" || echo "no") \
171174
--with-cuda-path=${CUDA_PATH:-${CUDA_HOME:-/CUDA_HOME-notset}} \
172175
--with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_70") \
176+
--enable-nvidia-cub --with-cusolver \
173177
FC=${MPIFC} \
174178
CC=${MPICC} \
175179
CXX=${MPICXX} \

toolchain/scripts/tool_kit.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ download_pkg_from_url() {
978978
"smart"|*)
979979
# Smart fallback: try with certificate validation first, then without
980980
echo "Attempting secure download: $__url"
981-
if wget ${DOWNLOADER_FLAGS} "$__url" -O "$__filename" 2>/dev/null; then
981+
if wget ${DOWNLOADER_FLAGS} "$__url" -O "$__filename"; then
982982
echo "Download successful with certificate validation"
983983
else
984984
echo "Certificate validation failed, retrying without certificate check..."

0 commit comments

Comments
 (0)