deepmodeling · mohanchen · Mar 22, 2025 · Mar 16, 2025 · Mar 16, 2025 · Mar 16, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -352,9 +352,19 @@ if(USE_CUDA)
     endif()
     if (ENABLE_CUSOLVERMP)
       add_compile_definitions(__CUSOLVERMP)
+      find_library(CAL_LIBRARY
+          NAMES cal
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
+      find_library(CUSOLVERMP_LIBRARY
+          NAMES cusolverMp
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
       target_link_libraries(${ABACUS_BIN_NAME}
-          cal
-          cusolverMp
+          ${CAL_LIBRARY}
+          ${CUSOLVERMP_LIBRARY}
       )
     endif()
   endif()

diff --git a/toolchain/README.md b/toolchain/README.md
@@ -2,7 +2,7 @@
 
 Version 2025.1
 
-## Author
+## Main Developer
 
 [QuantumMisaka](https://github.com/QuantumMisaka) 
 (Zhaoqing Liu) @PKU @AISI
@@ -26,8 +26,9 @@ and give setup files that you can use to compile ABACUS.
 - [x] Automatic installation of [CEREAL](https://github.com/USCiLab/cereal) and [LIBNPY](https://github.com/llohse/libnpy) (by github.com)
 - [x] Support for [LibRI](https://github.com/abacusmodeling/LibRI) by submodule or automatic installation from github.com (but installed LibRI via `wget` seems to have some problem, please be cautious)
 - [x] A mirror station by Bohrium database, which can download CEREAL, LibNPY, LibRI and LibComm by `wget` in China Internet. 
-- [x] Support for GPU compilation, users can add `-DUSE_CUDA=1` in builder scripts.
+- [x] Support for GPU-PW and GPU-LCAO compilation (elpa, cusolvermp is developing), and `-DUSE_CUDA=1` is needed builder scripts.
 - [x] Support for AMD compiler and math lib  `AOCL` and `AOCC` (not fully complete due to flang and AOCC-ABACUS compliation error)
+- [ ] Support for more GPU device out of Nvidia.
 - [ ] Change the downloading url from cp2k mirror to other mirror or directly downloading from official website. (doing)
 - [ ] Support a JSON or YAML configuration file for toolchain, which can be easily modified by users.
 - [ ] A better README and Detail markdown file.
@@ -138,7 +139,9 @@ Dependencies below are optional， which is NOT installed by default:
 - `LibComm` 0.1.1
 
 Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default. Also, user can specify the absolute path of the package by `--with-*=path/to/package` in toolchain*.sh to allow toolchain to use the package.
-> Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, that is also work for libnpy, which is more recommended.
+> Notice: LibTorch always suffer from GLIBC_VERSION problem, if you encounter this, please downgrade LibTorch version to 1.12.1 in scripts/stage4/install_torch.sh
+> 
+> Notice: LibRI, LibComm, Rapidjson and Libnpy is on actively development, you should check-out the package version when using this toolchain. 
 
 Users can easily compile and install dependencies of ABACUS
 by running these scripts after loading `gcc` or `intel-mkl-mpi`
@@ -187,6 +190,74 @@ or you can also do it in a more completely way:
 > rm -rf install build/*/* build/OpenBLAS*/ build/setup_*
 ```
 
+## GPU version of ABACUS
+
+Toolchain supports compiling GPU version of ABACUS with Nvidia-GPU and CUDA. For usage, adding following options in build*.sh:
+
+```shell
+# in build_abacus_gnu.sh
+cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
+        -DCMAKE_CXX_COMPILER=g++ \
+        -DMPI_CXX_COMPILER=mpicxx \
+        ......
+        -DUSE_CUDA=ON \
+        # -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
+        ......
+# in build_abacus_intel.sh
+cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
+        -DCMAKE_CXX_COMPILER=icpc \
+        -DMPI_CXX_COMPILER=mpiicpc \
+        ......
+        -DUSE_CUDA=ON \
+        # -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
+        ......
+```
+which will enable GPU version of ABACUS, and the `ks_solver cusolver` method can be directly used for PW and LCAO calculation.
+
+Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
+
+If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa` for multiple-GPU calculation, please compile according to the following usage:
+
+1. For the elpa method, add
+```shell
+export CUDA_PATH=/path/to/CUDA
+# install_abacus_toolchain.sh part options
+--enable-cuda \
+--gpu-ver=(GPU-compatibility-number) \
+```
+to the `toolchain_*.sh`, and then follow the normal step to install the dependencies using `./toolchain_*.sh`. For checking the GPU compatibility number, you can refer to the [CUDA compatibility](https://developer.nvidia.com/cuda-gpus).
+
+Afterwards, make sure these option are enable in your `build_abacus_*.sh` script 
+```shell
+-DUSE_ELPA=ON \
+-DUSE_CUDA=ON \
+```
+then just build the abacus executable program by compiling it with `./build_abacus_*.sh`.
+
+The ELPA method need more parameter setting, but it doesn't seem to be affected by the CUDA toolkits version, and it is no need to manually install and package. 
+
+2. For the cusolvermp method, toolchain_*.sh does not need to be changed, just follow it directly install dependencies using `./toolchain_*.sh`, and then add
+```shell
+-DUSE_CUDA=ON \
+-DENABLE_CUSOLVERMP=ON \
+-D CAL_CUSOLVERMP_PATH=/path/to/math.libs/1x.x/target/x86_64-linux/lib \
+```
+to the `build.abacus_*.sh` file. add the following three items to the environment (assuming you are using hpcsdk):
+```shell
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucc/lib
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucx/lib
+export CPATH=$CPATH:/path/to/math_libs/1x.x/targets/x86_64-linux/include
+```
+Just enough to build the abacus executable program by compiling it with `./build_abacus_*.sh`.
+
+You can refer to the linking video for auxiliary compilation and installation. [Bilibili](https://www.bilibili.com/video/BV1eqr5YuETN/).
+
+The cusolverMP requires installation from sources such as apt or yum, which is suitable for containers or local computers.
+The second choice is using [NVIDIA HPC_SDK](https://developer.nvidia.com/hpc-sdk-downloads) for installation, which is relatively simple, but the package from NVIDIA HPC_SDK may not be suitable, especially for muitiple-GPU parallel running. To better use cusolvermp and its dependency (libcal, ucx, ucc) in multi-GPU running, please contact your server manager.
+
+After compiling, you can specify `device GPU` in INPUT file to use GPU version of ABACUS.
+
+
 ## Common Problems and Solutions
 
 ### Intel-oneAPI problem
@@ -215,7 +286,7 @@ wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/0722521a-34b5-4
 
 Related discussion here [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
 
-#### link problem in early 2023 version oneAPI
+#### linking problem in early 2023 version oneAPI
 
 Sometimes Intel-oneAPI have problem to link `mpirun`, 
 which will always show in 2023.2.0 version of MPI in Intel-oneAPI. 
@@ -253,23 +324,6 @@ git clone https://github.com/abacusmodeling/LibComm
 
 OpenMPI in version 5 has huge update, lead to compatibility problem. If one wants to use the OpenMPI in version 4 (4.1.6), one can specify `--with-openmpi-4th=yes` in *toolchain_gnu.sh*
 
-### GPU version of ABACUS
-
-For GPU version of ABACUS (do not GPU version installer of ELPA, which is still doing work), add following options in build*.sh:
-
-```shell
-cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-        -DCMAKE_CXX_COMPILER=icpx \
-        -DMPI_CXX_COMPILER=mpiicpc \
-        ......
-        -DUSE_CUDA=1 \
-        -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \
-        ......
-```
-
-Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
-
-If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa`, please contact the coresponding developer, toolchain do not fully support them now.
 
 ### Shell problem
 
@@ -325,4 +379,4 @@ of each packages, which may let the installation more fiexible.
 
 ## More
 
-More infomation can be read from `Details.md`.
+More infomation can be read from `Details.md`.
diff --git a/toolchain/build_abacus_gnu-aocl.sh b/toolchain/build_abacus_gnu-aocl.sh
@@ -18,7 +18,7 @@ cd $ABACUS_DIR
 ABACUS_DIR=$(pwd)
 #AOCLhome=/opt/aocl  # user can specify this parameter
 
-BUILD_DIR=build_abacus_gnu
+BUILD_DIR=build_abacus_aocl
 rm -rf $BUILD_DIR
 
 PREFIX=$ABACUS_DIR

diff --git a/toolchain/build_abacus_gnu.sh b/toolchain/build_abacus_gnu.sh
@@ -24,6 +24,7 @@ PREFIX=$ABACUS_DIR
 LAPACK=$INSTALL_DIR/openblas-0.3.28/lib
 SCALAPACK=$INSTALL_DIR/scalapack-2.2.1/lib
 ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
+# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
 FFTW3=$INSTALL_DIR/fftw-3.3.10
 CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
 LIBXC=$INSTALL_DIR/libxc-7.0.0
@@ -49,13 +50,16 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DUSE_ELPA=ON \
         -DENABLE_RAPIDJSON=ON \
         -DRapidJSON_DIR=$RAPIDJSON \
+#        -DUSE_CUDA=ON \
 #         -DENABLE_DEEPKS=1 \
 #         -DTorch_DIR=$LIBTORCH \
 #         -Dlibnpy_INCLUDE_DIR=$LIBNPY \
 #         -DENABLE_LIBRI=ON \
 #         -DLIBRI_DIR=$LIBRI \
 #         -DLIBCOMM_DIR=$LIBCOMM \
 # 	      -DDeePMD_DIR=$DEEPMD \
+        #-DENABLE_CUSOLVERMP=ON \
+        #-D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib
 
 # # add mkl env for libtorch to link
 # if one want to install libtorch, mkl should be load in build process
@@ -81,4 +85,4 @@ Done!
 To use the installed ABACUS version
 You need to source ${TOOL}/abacus_env.sh first !
 """
-EOF
+EOF
diff --git a/toolchain/build_abacus_intel.sh b/toolchain/build_abacus_intel.sh
@@ -23,6 +23,7 @@ rm -rf $BUILD_DIR
 
 PREFIX=$ABACUS_DIR
 ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
+# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
 CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
 LIBXC=$INSTALL_DIR/libxc-7.0.0
 RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
@@ -32,7 +33,7 @@ RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
 # LIBCOMM=$INSTALL_DIR/LibComm-0.1.1
 # DEEPMD=$HOME/apps/anaconda3/envs/deepmd # v3.0 might have problem
 
-# if use deepks and deepmd
+# Notice: if you are compiling with AMD-CPU or GPU-version ABACUS, then `icpc` and `mpiicpc` compilers are recommended 
 cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DCMAKE_CXX_COMPILER=icpx \
         -DMPI_CXX_COMPILER=mpiicpx \
@@ -46,6 +47,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DUSE_ELPA=ON \
         -DENABLE_RAPIDJSON=ON \
         -DRapidJSON_DIR=$RAPIDJSON \
+#         -DUSE_CUDA=ON \
 #         -DENABLE_DEEPKS=1 \
 #         -DTorch_DIR=$LIBTORCH \
 #         -Dlibnpy_INCLUDE_DIR=$LIBNPY \
@@ -74,4 +76,4 @@ Done!
 To use the installed ABACUS version
 You need to source ${TOOL}/abacus_env.sh first !
 """
-EOF
+EOF
diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh
@@ -328,7 +328,7 @@ export intel_classic="no"
 # and will lead to problem in force calculation
 # but icx is recommended by intel compiler
 # option: --with-intel-classic can change it to yes/no
-# JamesMisaka by 2023.08
+# QuantumMisaka by 2023.08
 export intelmpi_classic="no"
 export with_ifx="yes" # whether ifx is used in oneapi
 export with_flang="no" # whether flang is used in aocc
@@ -397,7 +397,7 @@ while [ $# -ge 1 ]; do
           eval with_${ii}="__INSTALL__"
         fi
       done
-      # I'd like to use OpenMPI as default -- zhaoqing liu in 2023.09.17
+      # I'd like to use OpenMPI as default -- QuantumMisaka in 2023.09.17
       export MPI_MODE="openmpi"
       ;;
     --mpi-mode=*)
@@ -448,16 +448,7 @@ while [ $# -ge 1 ]; do
       ;;
     --gpu-ver=*)
       user_input="${1#*=}"
-      case "${user_input}" in
-        K20X | K40 | K80 | P100 | V100 | A100 | Mi50 | Mi100 | Mi250 | no)
-          export GPUVER="${user_input}"
-          ;;
-        *)
-          report_error ${LINENO} \
-            "--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
-          exit 1
-          ;;
-      esac
+      export GPUVER="${user_input}"
       ;;
     --target-cpu=*)
       user_input="${1#*=}"
@@ -684,7 +675,7 @@ else
   esac
 fi
 # If MATH_MODE is mkl ,then openblas, scalapack and fftw is not needed
-# zhaoqing in 2023-09-17
+# QuantumMisaka in 2023-09-17
 if [ "${MATH_MODE}" = "mkl" ]; then
   if [ "${with_openblas}" != "__DONTUSE__" ]; then
     echo "Using MKL, so openblas is disabled."
@@ -700,6 +691,17 @@ if [ "${MATH_MODE}" = "mkl" ]; then
   fi
 fi
 
+# Select the correct compute number based on the GPU architecture
+# QuantumMisaka in 2025-03-19
+export ARCH_NUM="${GPUVER//.}"
+if [[ "$ARCH_NUM" =~ ^[1-9][0-9]*$ ]] || [ $ARCH_NUM = "no" ]; then
+    echo "Notice: GPU compilation is enabled, and GPU compatibility is set via --gpu-ver to sm_${ARCH_NUM}."
+else
+    report_error ${LINENO} \
+        "When GPU compilation is enabled, the --gpu-ver variable should be properly set regarding to GPU compatibility. For check your GPU compatibility, visit https://developer.nvidia.com/cuda-gpus. For example: A100 -> 8.0 (or 80), V100 -> 7.0 (or 70), 4090 -> 8.9 (or 89)"
+    exit 1
+fi
+
 # If CUDA or HIP are enabled, make sure the GPU version has been defined.
 if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
   if [ "${GPUVER}" = "no" ]; then
@@ -708,9 +710,10 @@ if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
   fi
 fi
 
-# several packages require cmake.
-if [ "${with_scalapack}" = "__INSTALL__" ]; then
-  [ "${with_cmake}" = "__DONTUSE__" ] && with_cmake="__INSTALL__"
+# ABACUS itself and some dependencies require cmake.
+if [ "${with_cmake}" = "__DONTUSE__" ]; then
+  report_error "CMake is required for ABACUS and some dependencies. Please enable it."
+  exit 1
 fi
 
 
@@ -816,45 +819,6 @@ fi
 
 echo "Compiling with $(get_nprocs) processes for target ${TARGET_CPU}."
 
-# Select the correct compute number based on the GPU architecture
-case ${GPUVER} in
-  K20X)
-    export ARCH_NUM="35"
-    ;;
-  K40)
-    export ARCH_NUM="35"
-    ;;
-  K80)
-    export ARCH_NUM="37"
-    ;;
-  P100)
-    export ARCH_NUM="60"
-    ;;
-  V100)
-    export ARCH_NUM="70"
-    ;;
-  A100)
-    export ARCH_NUM="80"
-    ;;
-  Mi50)
-    # TODO: export ARCH_NUM=
-    ;;
-  Mi100)
-    # TODO: export ARCH_NUM=
-    ;;
-  Mi250)
-    # TODO: export ARCH_NUM=
-    ;;
-  no)
-    export ARCH_NUM="no"
-    ;;
-  *)
-    report_error ${LINENO} \
-      "--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
-    exit 1
-    ;;
-esac
-
 write_toolchain_env ${INSTALLDIR}
 
 # write toolchain config