Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7929675
Add optional LCAO base GPU versions supported by cusolvermp
tang070205 Mar 16, 2025
9b8ed70
Add optional LCAO base GPU versions supported by elpa
tang070205 Mar 16, 2025
0a6a097
Add optional LCAO base GPU versions supported by elpa
tang070205 Mar 16, 2025
471d6f3
Add L40S as GPUVER value for sm_89 architecture
tang070205 Mar 16, 2025
0bbf6f2
Delete a few lines of content to enable Nvidia to compile
tang070205 Mar 16, 2025
b3defd5
Add a specified Fortran mpi compiler for elpa to use
tang070205 Mar 16, 2025
dad1705
Add CUDA path for use by ELPA-GPU
tang070205 Mar 16, 2025
6807886
Add optional LCAO base GPU versions supported by elpa
tang070205 Mar 16, 2025
a145e53
Modify a small issue
tang070205 Mar 16, 2025
5318a96
Change to manually specifying the link libraries for CAL and cusolverMp
tang070205 Mar 16, 2025
7f8ddce
Merge branch 'develop' into develop
tang070205 Mar 18, 2025
25a2239
Add the use of 'cusolvermp' or 'elpa' methods to compile ABACUS GPU-LCAO
tang070205 Mar 18, 2025
9bd3ffd
Add the use of 'cusolvermp' or 'elpa' methods to compile ABACUS GPU-LCAO
tang070205 Mar 18, 2025
c18551c
Add the use of 'cusolvermp' or 'elpa' methods to compile ABACUS GPU-LCAO
tang070205 Mar 18, 2025
c1f832c
Add modification
QuantumMisaka Mar 19, 2025
2aea32b
minor adjustment
QuantumMisaka Mar 19, 2025
539f593
update README
QuantumMisaka Mar 19, 2025
eb0ab10
give back cmake default option
QuantumMisaka Mar 19, 2025
1865435
Merge pull request #1 from QuantumMisaka/lcao-gpu-modify
tang070205 Mar 19, 2025
13735bc
update README and cusolvermp
QuantumMisaka Mar 20, 2025
be89bc7
Merge pull request #2 from QuantumMisaka/lcao-gpu-modify
tang070205 Mar 20, 2025
654f8ba
Update README.md
tang070205 Mar 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,19 @@ if(USE_CUDA)
endif()
if (ENABLE_CUSOLVERMP)
add_compile_definitions(__CUSOLVERMP)
find_library(CAL_LIBRARY
NAMES cal
PATHS ${CAL_CUSOLVERMP_PATH}
NO_DEFAULT_PATH
)
find_library(CUSOLVERMP_LIBRARY
NAMES cusolverMp
PATHS ${CAL_CUSOLVERMP_PATH}
NO_DEFAULT_PATH
)
target_link_libraries(${ABACUS_BIN_NAME}
cal
cusolverMp
${CAL_LIBRARY}
${CUSOLVERMP_LIBRARY}
)
endif()
endif()
Expand Down
98 changes: 76 additions & 22 deletions toolchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Version 2025.1

## Author
## Main Developer

[QuantumMisaka](https://github.com/QuantumMisaka)
(Zhaoqing Liu) @PKU @AISI
Expand All @@ -26,8 +26,9 @@ and give setup files that you can use to compile ABACUS.
- [x] Automatic installation of [CEREAL](https://github.com/USCiLab/cereal) and [LIBNPY](https://github.com/llohse/libnpy) (by github.com)
- [x] Support for [LibRI](https://github.com/abacusmodeling/LibRI) by submodule or automatic installation from github.com (but installed LibRI via `wget` seems to have some problem, please be cautious)
- [x] A mirror station by Bohrium database, which can download CEREAL, LibNPY, LibRI and LibComm by `wget` in China Internet.
- [x] Support for GPU compilation, users can add `-DUSE_CUDA=1` in builder scripts.
- [x] Support for GPU-PW and GPU-LCAO compilation (elpa, cusolvermp is developing), and `-DUSE_CUDA=1` is needed builder scripts.
- [x] Support for AMD compiler and math lib `AOCL` and `AOCC` (not fully complete due to flang and AOCC-ABACUS compliation error)
- [ ] Support for more GPU device out of Nvidia.
- [ ] Change the downloading url from cp2k mirror to other mirror or directly downloading from official website. (doing)
- [ ] Support a JSON or YAML configuration file for toolchain, which can be easily modified by users.
- [ ] A better README and Detail markdown file.
Expand Down Expand Up @@ -138,7 +139,9 @@ Dependencies below are optional, which is NOT installed by default:
- `LibComm` 0.1.1

Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default. Also, user can specify the absolute path of the package by `--with-*=path/to/package` in toolchain*.sh to allow toolchain to use the package.
> Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, that is also work for libnpy, which is more recommended.
> Notice: LibTorch always suffer from GLIBC_VERSION problem, if you encounter this, please downgrade LibTorch version to 1.12.1 in scripts/stage4/install_torch.sh
>
> Notice: LibRI, LibComm, Rapidjson and Libnpy is on actively development, you should check-out the package version when using this toolchain.

Users can easily compile and install dependencies of ABACUS
by running these scripts after loading `gcc` or `intel-mkl-mpi`
Expand Down Expand Up @@ -187,6 +190,74 @@ or you can also do it in a more completely way:
> rm -rf install build/*/* build/OpenBLAS*/ build/setup_*
```

## GPU version of ABACUS

Toolchain supports compiling GPU version of ABACUS with Nvidia-GPU and CUDA. For usage, adding following options in build*.sh:

```shell
# in build_abacus_gnu.sh
cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_CXX_COMPILER=g++ \
-DMPI_CXX_COMPILER=mpicxx \
......
-DUSE_CUDA=ON \
# -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
......
# in build_abacus_intel.sh
cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_CXX_COMPILER=icpc \
-DMPI_CXX_COMPILER=mpiicpc \
......
-DUSE_CUDA=ON \
# -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
......
```
which will enable GPU version of ABACUS, and the `ks_solver cusolver` method can be directly used for PW and LCAO calculation.

Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)

If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa` for multiple-GPU calculation, please compile according to the following usage:

1. For the elpa method, add
```shell
export CUDA_PATH=/path/to/CUDA
# install_abacus_toolchain.sh part options
--enable-cuda \
--gpu-ver=(GPU-compatibility-number) \
```
to the `toolchain_*.sh`, and then follow the normal step to install the dependencies using `./toolchain_*.sh`. For checking the GPU compatibility number, you can refer to the [CUDA compatibility](https://developer.nvidia.com/cuda-gpus).

Afterwards, make sure these option are enable in your `build_abacus_*.sh` script
```shell
-DUSE_ELPA=ON \
-DUSE_CUDA=ON \
```
then just build the abacus executable program by compiling it with `./build_abacus_*.sh`.

The ELPA method need more parameter setting, but it doesn't seem to be affected by the CUDA toolkits version, and it is no need to manually install and package.

2. For the cusolvermp method, toolchain_*.sh does not need to be changed, just follow it directly install dependencies using `./toolchain_*.sh`, and then add
```shell
-DUSE_CUDA=ON \
-DENABLE_CUSOLVERMP=ON \
-D CAL_CUSOLVERMP_PATH=/path/to/math.libs/1x.x/target/x86_64-linux/lib \
```
to the `build.abacus_*.sh` file. add the following three items to the environment (assuming you are using hpcsdk):
```shell
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucc/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucx/lib
export CPATH=$CPATH:/path/to/math_libs/1x.x/targets/x86_64-linux/include
```
Just enough to build the abacus executable program by compiling it with `./build_abacus_*.sh`.

You can refer to the linking video for auxiliary compilation and installation. [Bilibili](https://www.bilibili.com/video/BV1eqr5YuETN/).

The cusolverMP requires installation from sources such as apt or yum, which is suitable for containers or local computers.
The second choice is using [NVIDIA HPC_SDK](https://developer.nvidia.com/hpc-sdk-downloads) for installation, which is relatively simple, but the package from NVIDIA HPC_SDK may not be suitable, especially for muitiple-GPU parallel running. To better use cusolvermp and its dependency (libcal, ucx, ucc) in multi-GPU running, please contact your server manager.

After compiling, you can specify `device GPU` in INPUT file to use GPU version of ABACUS.


## Common Problems and Solutions

### Intel-oneAPI problem
Expand Down Expand Up @@ -215,7 +286,7 @@ wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/0722521a-34b5-4

Related discussion here [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)

#### link problem in early 2023 version oneAPI
#### linking problem in early 2023 version oneAPI

Sometimes Intel-oneAPI have problem to link `mpirun`,
which will always show in 2023.2.0 version of MPI in Intel-oneAPI.
Expand Down Expand Up @@ -253,23 +324,6 @@ git clone https://github.com/abacusmodeling/LibComm

OpenMPI in version 5 has huge update, lead to compatibility problem. If one wants to use the OpenMPI in version 4 (4.1.6), one can specify `--with-openmpi-4th=yes` in *toolchain_gnu.sh*

### GPU version of ABACUS

For GPU version of ABACUS (do not GPU version installer of ELPA, which is still doing work), add following options in build*.sh:

```shell
cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_CXX_COMPILER=icpx \
-DMPI_CXX_COMPILER=mpiicpc \
......
-DUSE_CUDA=1 \
-DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \
......
```

Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)

If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa`, please contact the coresponding developer, toolchain do not fully support them now.

### Shell problem

Expand Down Expand Up @@ -325,4 +379,4 @@ of each packages, which may let the installation more fiexible.

## More

More infomation can be read from `Details.md`.
More infomation can be read from `Details.md`.
2 changes: 1 addition & 1 deletion toolchain/build_abacus_gnu-aocl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cd $ABACUS_DIR
ABACUS_DIR=$(pwd)
#AOCLhome=/opt/aocl # user can specify this parameter

BUILD_DIR=build_abacus_gnu
BUILD_DIR=build_abacus_aocl
rm -rf $BUILD_DIR

PREFIX=$ABACUS_DIR
Expand Down
6 changes: 5 additions & 1 deletion toolchain/build_abacus_gnu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ PREFIX=$ABACUS_DIR
LAPACK=$INSTALL_DIR/openblas-0.3.28/lib
SCALAPACK=$INSTALL_DIR/scalapack-2.2.1/lib
ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
FFTW3=$INSTALL_DIR/fftw-3.3.10
CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
LIBXC=$INSTALL_DIR/libxc-7.0.0
Expand All @@ -49,13 +50,16 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DUSE_ELPA=ON \
-DENABLE_RAPIDJSON=ON \
-DRapidJSON_DIR=$RAPIDJSON \
# -DUSE_CUDA=ON \
# -DENABLE_DEEPKS=1 \
# -DTorch_DIR=$LIBTORCH \
# -Dlibnpy_INCLUDE_DIR=$LIBNPY \
# -DENABLE_LIBRI=ON \
# -DLIBRI_DIR=$LIBRI \
# -DLIBCOMM_DIR=$LIBCOMM \
# -DDeePMD_DIR=$DEEPMD \
#-DENABLE_CUSOLVERMP=ON \
#-D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib

# # add mkl env for libtorch to link
# if one want to install libtorch, mkl should be load in build process
Expand All @@ -81,4 +85,4 @@ Done!
To use the installed ABACUS version
You need to source ${TOOL}/abacus_env.sh first !
"""
EOF
EOF
6 changes: 4 additions & 2 deletions toolchain/build_abacus_intel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ rm -rf $BUILD_DIR

PREFIX=$ABACUS_DIR
ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
LIBXC=$INSTALL_DIR/libxc-7.0.0
RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
Expand All @@ -32,7 +33,7 @@ RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
# LIBCOMM=$INSTALL_DIR/LibComm-0.1.1
# DEEPMD=$HOME/apps/anaconda3/envs/deepmd # v3.0 might have problem

# if use deepks and deepmd
# Notice: if you are compiling with AMD-CPU or GPU-version ABACUS, then `icpc` and `mpiicpc` compilers are recommended
cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_CXX_COMPILER=icpx \
-DMPI_CXX_COMPILER=mpiicpx \
Expand All @@ -46,6 +47,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-DUSE_ELPA=ON \
-DENABLE_RAPIDJSON=ON \
-DRapidJSON_DIR=$RAPIDJSON \
# -DUSE_CUDA=ON \
# -DENABLE_DEEPKS=1 \
# -DTorch_DIR=$LIBTORCH \
# -Dlibnpy_INCLUDE_DIR=$LIBNPY \
Expand Down Expand Up @@ -74,4 +76,4 @@ Done!
To use the installed ABACUS version
You need to source ${TOOL}/abacus_env.sh first !
"""
EOF
EOF
74 changes: 19 additions & 55 deletions toolchain/install_abacus_toolchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ export intel_classic="no"
# and will lead to problem in force calculation
# but icx is recommended by intel compiler
# option: --with-intel-classic can change it to yes/no
# JamesMisaka by 2023.08
# QuantumMisaka by 2023.08
export intelmpi_classic="no"
export with_ifx="yes" # whether ifx is used in oneapi
export with_flang="no" # whether flang is used in aocc
Expand Down Expand Up @@ -397,7 +397,7 @@ while [ $# -ge 1 ]; do
eval with_${ii}="__INSTALL__"
fi
done
# I'd like to use OpenMPI as default -- zhaoqing liu in 2023.09.17
# I'd like to use OpenMPI as default -- QuantumMisaka in 2023.09.17
export MPI_MODE="openmpi"
;;
--mpi-mode=*)
Expand Down Expand Up @@ -448,16 +448,7 @@ while [ $# -ge 1 ]; do
;;
--gpu-ver=*)
user_input="${1#*=}"
case "${user_input}" in
K20X | K40 | K80 | P100 | V100 | A100 | Mi50 | Mi100 | Mi250 | no)
export GPUVER="${user_input}"
;;
*)
report_error ${LINENO} \
"--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
exit 1
;;
esac
export GPUVER="${user_input}"
;;
--target-cpu=*)
user_input="${1#*=}"
Expand Down Expand Up @@ -684,7 +675,7 @@ else
esac
fi
# If MATH_MODE is mkl ,then openblas, scalapack and fftw is not needed
# zhaoqing in 2023-09-17
# QuantumMisaka in 2023-09-17
if [ "${MATH_MODE}" = "mkl" ]; then
if [ "${with_openblas}" != "__DONTUSE__" ]; then
echo "Using MKL, so openblas is disabled."
Expand All @@ -700,6 +691,17 @@ if [ "${MATH_MODE}" = "mkl" ]; then
fi
fi

# Select the correct compute number based on the GPU architecture
# QuantumMisaka in 2025-03-19
export ARCH_NUM="${GPUVER//.}"
if [[ "$ARCH_NUM" =~ ^[1-9][0-9]*$ ]] || [ $ARCH_NUM = "no" ]; then
echo "Notice: GPU compilation is enabled, and GPU compatibility is set via --gpu-ver to sm_${ARCH_NUM}."
else
report_error ${LINENO} \
"When GPU compilation is enabled, the --gpu-ver variable should be properly set regarding to GPU compatibility. For check your GPU compatibility, visit https://developer.nvidia.com/cuda-gpus. For example: A100 -> 8.0 (or 80), V100 -> 7.0 (or 70), 4090 -> 8.9 (or 89)"
exit 1
fi

# If CUDA or HIP are enabled, make sure the GPU version has been defined.
if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
if [ "${GPUVER}" = "no" ]; then
Expand All @@ -708,9 +710,10 @@ if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
fi
fi

# several packages require cmake.
if [ "${with_scalapack}" = "__INSTALL__" ]; then
[ "${with_cmake}" = "__DONTUSE__" ] && with_cmake="__INSTALL__"
# ABACUS itself and some dependencies require cmake.
if [ "${with_cmake}" = "__DONTUSE__" ]; then
report_error "CMake is required for ABACUS and some dependencies. Please enable it."
exit 1
fi


Expand Down Expand Up @@ -816,45 +819,6 @@ fi

echo "Compiling with $(get_nprocs) processes for target ${TARGET_CPU}."

# Select the correct compute number based on the GPU architecture
case ${GPUVER} in
K20X)
export ARCH_NUM="35"
;;
K40)
export ARCH_NUM="35"
;;
K80)
export ARCH_NUM="37"
;;
P100)
export ARCH_NUM="60"
;;
V100)
export ARCH_NUM="70"
;;
A100)
export ARCH_NUM="80"
;;
Mi50)
# TODO: export ARCH_NUM=
;;
Mi100)
# TODO: export ARCH_NUM=
;;
Mi250)
# TODO: export ARCH_NUM=
;;
no)
export ARCH_NUM="no"
;;
*)
report_error ${LINENO} \
"--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
exit 1
;;
esac

write_toolchain_env ${INSTALLDIR}

# write toolchain config
Expand Down
Loading
Loading