diff --git a/CMakeLists.txt b/CMakeLists.txt
index 87fbef8f8e..82730031b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -352,9 +352,19 @@ if(USE_CUDA)
     endif()
     if (ENABLE_CUSOLVERMP)
       add_compile_definitions(__CUSOLVERMP)
+      find_library(CAL_LIBRARY
+          NAMES cal
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
+      find_library(CUSOLVERMP_LIBRARY
+          NAMES cusolverMp
+          PATHS ${CAL_CUSOLVERMP_PATH}
+          NO_DEFAULT_PATH
+      )
       target_link_libraries(${ABACUS_BIN_NAME}
-          cal
-          cusolverMp
+          ${CAL_LIBRARY}
+          ${CUSOLVERMP_LIBRARY}
       )
     endif()
   endif()
diff --git a/toolchain/README.md b/toolchain/README.md
index 7c35a15d4a..d190ff1064 100644
--- a/toolchain/README.md
+++ b/toolchain/README.md
@@ -2,7 +2,7 @@
 
 Version 2025.1
 
-## Author
+## Main Developer
 
 [QuantumMisaka](https://github.com/QuantumMisaka) 
 (Zhaoqing Liu) @PKU @AISI
@@ -26,8 +26,9 @@ and give setup files that you can use to compile ABACUS.
 - [x] Automatic installation of [CEREAL](https://github.com/USCiLab/cereal) and [LIBNPY](https://github.com/llohse/libnpy) (by github.com)
 - [x] Support for [LibRI](https://github.com/abacusmodeling/LibRI) by submodule or automatic installation from github.com (but installed LibRI via `wget` seems to have some problem, please be cautious)
 - [x] A mirror station by Bohrium database, which can download CEREAL, LibNPY, LibRI and LibComm by `wget` in China Internet. 
-- [x] Support for GPU compilation, users can add `-DUSE_CUDA=1` in builder scripts.
+- [x] Support for GPU-PW and GPU-LCAO compilation (elpa, cusolvermp is developing), and `-DUSE_CUDA=1` is needed builder scripts.
 - [x] Support for AMD compiler and math lib  `AOCL` and `AOCC` (not fully complete due to flang and AOCC-ABACUS compliation error)
+- [ ] Support for more GPU device out of Nvidia.
 - [ ] Change the downloading url from cp2k mirror to other mirror or directly downloading from official website. (doing)
 - [ ] Support a JSON or YAML configuration file for toolchain, which can be easily modified by users.
 - [ ] A better README and Detail markdown file.
@@ -138,7 +139,9 @@ Dependencies below are optional， which is NOT installed by default:
 - `LibComm` 0.1.1
 
 Users can install them by using `--with-*=install` in toolchain*.sh, which is `no` in default. Also, user can specify the absolute path of the package by `--with-*=path/to/package` in toolchain*.sh to allow toolchain to use the package.
-> Notice: LibRI, LibComm and Libnpy is on actively development, you should check-out the package version when using this toolchain. Also, LibRI and LibComm can be installed by github submodule, that is also work for libnpy, which is more recommended.
+> Notice: LibTorch always suffer from GLIBC_VERSION problem, if you encounter this, please downgrade LibTorch version to 1.12.1 in scripts/stage4/install_torch.sh
+> 
+> Notice: LibRI, LibComm, Rapidjson and Libnpy is on actively development, you should check-out the package version when using this toolchain. 
 
 Users can easily compile and install dependencies of ABACUS
 by running these scripts after loading `gcc` or `intel-mkl-mpi`
@@ -187,6 +190,74 @@ or you can also do it in a more completely way:
 > rm -rf install build/*/* build/OpenBLAS*/ build/setup_*
 ```
 
+## GPU version of ABACUS
+
+Toolchain supports compiling GPU version of ABACUS with Nvidia-GPU and CUDA. For usage, adding following options in build*.sh:
+
+```shell
+# in build_abacus_gnu.sh
+cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
+        -DCMAKE_CXX_COMPILER=g++ \
+        -DMPI_CXX_COMPILER=mpicxx \
+        ......
+        -DUSE_CUDA=ON \
+        # -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
+        ......
+# in build_abacus_intel.sh
+cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
+        -DCMAKE_CXX_COMPILER=icpc \
+        -DMPI_CXX_COMPILER=mpiicpc \
+        ......
+        -DUSE_CUDA=ON \
+        # -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \ # add if needed
+        ......
+```
+which will enable GPU version of ABACUS, and the `ks_solver cusolver` method can be directly used for PW and LCAO calculation.
+
+Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
+
+If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa` for multiple-GPU calculation, please compile according to the following usage:
+
+1. For the elpa method, add
+```shell
+export CUDA_PATH=/path/to/CUDA
+# install_abacus_toolchain.sh part options
+--enable-cuda \
+--gpu-ver=(GPU-compatibility-number) \
+```
+to the `toolchain_*.sh`, and then follow the normal step to install the dependencies using `./toolchain_*.sh`. For checking the GPU compatibility number, you can refer to the [CUDA compatibility](https://developer.nvidia.com/cuda-gpus).
+
+Afterwards, make sure these option are enable in your `build_abacus_*.sh` script 
+```shell
+-DUSE_ELPA=ON \
+-DUSE_CUDA=ON \
+```
+then just build the abacus executable program by compiling it with `./build_abacus_*.sh`.
+
+The ELPA method need more parameter setting, but it doesn't seem to be affected by the CUDA toolkits version, and it is no need to manually install and package. 
+
+2. For the cusolvermp method, toolchain_*.sh does not need to be changed, just follow it directly install dependencies using `./toolchain_*.sh`, and then add
+```shell
+-DUSE_CUDA=ON \
+-DENABLE_CUSOLVERMP=ON \
+-D CAL_CUSOLVERMP_PATH=/path/to/math.libs/1x.x/target/x86_64-linux/lib \
+```
+to the `build.abacus_*.sh` file. add the following three items to the environment (assuming you are using hpcsdk):
+```shell
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucc/lib
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/comm_libs/1x.x/hpcx/hpcx-x.xx/ucx/lib
+export CPATH=$CPATH:/path/to/math_libs/1x.x/targets/x86_64-linux/include
+```
+Just enough to build the abacus executable program by compiling it with `./build_abacus_*.sh`.
+
+You can refer to the linking video for auxiliary compilation and installation. [Bilibili](https://www.bilibili.com/video/BV1eqr5YuETN/).
+
+The cusolverMP requires installation from sources such as apt or yum, which is suitable for containers or local computers.
+The second choice is using [NVIDIA HPC_SDK](https://developer.nvidia.com/hpc-sdk-downloads) for installation, which is relatively simple, but the package from NVIDIA HPC_SDK may not be suitable, especially for muitiple-GPU parallel running. To better use cusolvermp and its dependency (libcal, ucx, ucc) in multi-GPU running, please contact your server manager.
+
+After compiling, you can specify `device GPU` in INPUT file to use GPU version of ABACUS.
+
+
 ## Common Problems and Solutions
 
 ### Intel-oneAPI problem
@@ -215,7 +286,7 @@ wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/0722521a-34b5-4
 
 Related discussion here [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
 
-#### link problem in early 2023 version oneAPI
+#### linking problem in early 2023 version oneAPI
 
 Sometimes Intel-oneAPI have problem to link `mpirun`, 
 which will always show in 2023.2.0 version of MPI in Intel-oneAPI. 
@@ -253,23 +324,6 @@ git clone https://github.com/abacusmodeling/LibComm
 
 OpenMPI in version 5 has huge update, lead to compatibility problem. If one wants to use the OpenMPI in version 4 (4.1.6), one can specify `--with-openmpi-4th=yes` in *toolchain_gnu.sh*
 
-### GPU version of ABACUS
-
-For GPU version of ABACUS (do not GPU version installer of ELPA, which is still doing work), add following options in build*.sh:
-
-```shell
-cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
-        -DCMAKE_CXX_COMPILER=icpx \
-        -DMPI_CXX_COMPILER=mpiicpc \
-        ......
-        -DUSE_CUDA=1 \
-        -DCMAKE_CUDA_COMPILER=${path to cuda toolkit}/bin/nvcc \
-        ......
-```
-
-Notice: You CANNOT use `icpx` compiler for GPU version of ABACUS for now, see discussion here [#2906](https://github.com/deepmodeling/abacus-develop/issues/2906) and [#4976](https://github.com/deepmodeling/abacus-develop/issues/4976)
-
-If you wants to use ABACUS GPU-LCAO by `cusolvermp` or `elpa`, please contact the coresponding developer, toolchain do not fully support them now.
 
 ### Shell problem
 
@@ -325,4 +379,4 @@ of each packages, which may let the installation more fiexible.
 
 ## More
 
-More infomation can be read from `Details.md`.
\ No newline at end of file
+More infomation can be read from `Details.md`.
diff --git a/toolchain/build_abacus_gnu-aocl.sh b/toolchain/build_abacus_gnu-aocl.sh
index 3ab0ce97fd..ab283efb3b 100755
--- a/toolchain/build_abacus_gnu-aocl.sh
+++ b/toolchain/build_abacus_gnu-aocl.sh
@@ -18,7 +18,7 @@ cd $ABACUS_DIR
 ABACUS_DIR=$(pwd)
 #AOCLhome=/opt/aocl  # user can specify this parameter
 
-BUILD_DIR=build_abacus_gnu
+BUILD_DIR=build_abacus_aocl
 rm -rf $BUILD_DIR
 
 PREFIX=$ABACUS_DIR
diff --git a/toolchain/build_abacus_gnu.sh b/toolchain/build_abacus_gnu.sh
index 27328c7eec..febe2fa5aa 100755
--- a/toolchain/build_abacus_gnu.sh
+++ b/toolchain/build_abacus_gnu.sh
@@ -24,6 +24,7 @@ PREFIX=$ABACUS_DIR
 LAPACK=$INSTALL_DIR/openblas-0.3.28/lib
 SCALAPACK=$INSTALL_DIR/scalapack-2.2.1/lib
 ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
+# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
 FFTW3=$INSTALL_DIR/fftw-3.3.10
 CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
 LIBXC=$INSTALL_DIR/libxc-7.0.0
@@ -49,6 +50,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DUSE_ELPA=ON \
         -DENABLE_RAPIDJSON=ON \
         -DRapidJSON_DIR=$RAPIDJSON \
+#        -DUSE_CUDA=ON \
 #         -DENABLE_DEEPKS=1 \
 #         -DTorch_DIR=$LIBTORCH \
 #         -Dlibnpy_INCLUDE_DIR=$LIBNPY \
@@ -56,6 +58,8 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
 #         -DLIBRI_DIR=$LIBRI \
 #         -DLIBCOMM_DIR=$LIBCOMM \
 # 	      -DDeePMD_DIR=$DEEPMD \
+        #-DENABLE_CUSOLVERMP=ON \
+        #-D CAL_CUSOLVERMP_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/2x.xx/math_libs/1x.x/targets/x86_64-linux/lib
 
 # # add mkl env for libtorch to link
 # if one want to install libtorch, mkl should be load in build process
@@ -81,4 +85,4 @@ Done!
 To use the installed ABACUS version
 You need to source ${TOOL}/abacus_env.sh first !
 """
-EOF
\ No newline at end of file
+EOF
diff --git a/toolchain/build_abacus_intel.sh b/toolchain/build_abacus_intel.sh
index a2ef7dd8b0..5fc96a26b8 100755
--- a/toolchain/build_abacus_intel.sh
+++ b/toolchain/build_abacus_intel.sh
@@ -23,6 +23,7 @@ rm -rf $BUILD_DIR
 
 PREFIX=$ABACUS_DIR
 ELPA=$INSTALL_DIR/elpa-2025.01.001/cpu
+# ELPA=$INSTALL_DIR/elpa-2025.01.001/nvidia # for gpu-lcao
 CEREAL=$INSTALL_DIR/cereal-1.3.2/include/cereal
 LIBXC=$INSTALL_DIR/libxc-7.0.0
 RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
@@ -32,7 +33,7 @@ RAPIDJSON=$INSTALL_DIR/rapidjson-1.1.0/
 # LIBCOMM=$INSTALL_DIR/LibComm-0.1.1
 # DEEPMD=$HOME/apps/anaconda3/envs/deepmd # v3.0 might have problem
 
-# if use deepks and deepmd
+# Notice: if you are compiling with AMD-CPU or GPU-version ABACUS, then `icpc` and `mpiicpc` compilers are recommended 
 cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DCMAKE_CXX_COMPILER=icpx \
         -DMPI_CXX_COMPILER=mpiicpx \
@@ -46,6 +47,7 @@ cmake -B $BUILD_DIR -DCMAKE_INSTALL_PREFIX=$PREFIX \
         -DUSE_ELPA=ON \
         -DENABLE_RAPIDJSON=ON \
         -DRapidJSON_DIR=$RAPIDJSON \
+#         -DUSE_CUDA=ON \
 #         -DENABLE_DEEPKS=1 \
 #         -DTorch_DIR=$LIBTORCH \
 #         -Dlibnpy_INCLUDE_DIR=$LIBNPY \
@@ -74,4 +76,4 @@ Done!
 To use the installed ABACUS version
 You need to source ${TOOL}/abacus_env.sh first !
 """
-EOF
\ No newline at end of file
+EOF
diff --git a/toolchain/install_abacus_toolchain.sh b/toolchain/install_abacus_toolchain.sh
index 2ed465f646..b84ac0af1a 100755
--- a/toolchain/install_abacus_toolchain.sh
+++ b/toolchain/install_abacus_toolchain.sh
@@ -328,7 +328,7 @@ export intel_classic="no"
 # and will lead to problem in force calculation
 # but icx is recommended by intel compiler
 # option: --with-intel-classic can change it to yes/no
-# JamesMisaka by 2023.08
+# QuantumMisaka by 2023.08
 export intelmpi_classic="no"
 export with_ifx="yes" # whether ifx is used in oneapi
 export with_flang="no" # whether flang is used in aocc
@@ -397,7 +397,7 @@ while [ $# -ge 1 ]; do
           eval with_${ii}="__INSTALL__"
         fi
       done
-      # I'd like to use OpenMPI as default -- zhaoqing liu in 2023.09.17
+      # I'd like to use OpenMPI as default -- QuantumMisaka in 2023.09.17
       export MPI_MODE="openmpi"
       ;;
     --mpi-mode=*)
@@ -448,16 +448,7 @@ while [ $# -ge 1 ]; do
       ;;
     --gpu-ver=*)
       user_input="${1#*=}"
-      case "${user_input}" in
-        K20X | K40 | K80 | P100 | V100 | A100 | Mi50 | Mi100 | Mi250 | no)
-          export GPUVER="${user_input}"
-          ;;
-        *)
-          report_error ${LINENO} \
-            "--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
-          exit 1
-          ;;
-      esac
+      export GPUVER="${user_input}"
       ;;
     --target-cpu=*)
       user_input="${1#*=}"
@@ -684,7 +675,7 @@ else
   esac
 fi
 # If MATH_MODE is mkl ,then openblas, scalapack and fftw is not needed
-# zhaoqing in 2023-09-17
+# QuantumMisaka in 2023-09-17
 if [ "${MATH_MODE}" = "mkl" ]; then
   if [ "${with_openblas}" != "__DONTUSE__" ]; then
     echo "Using MKL, so openblas is disabled."
@@ -700,6 +691,17 @@ if [ "${MATH_MODE}" = "mkl" ]; then
   fi
 fi
 
+# Select the correct compute number based on the GPU architecture
+# QuantumMisaka in 2025-03-19
+export ARCH_NUM="${GPUVER//.}"
+if [[ "$ARCH_NUM" =~ ^[1-9][0-9]*$ ]] || [ $ARCH_NUM = "no" ]; then
+    echo "Notice: GPU compilation is enabled, and GPU compatibility is set via --gpu-ver to sm_${ARCH_NUM}."
+else
+    report_error ${LINENO} \
+        "When GPU compilation is enabled, the --gpu-ver variable should be properly set regarding to GPU compatibility. For check your GPU compatibility, visit https://developer.nvidia.com/cuda-gpus. For example: A100 -> 8.0 (or 80), V100 -> 7.0 (or 70), 4090 -> 8.9 (or 89)"
+    exit 1
+fi
+
 # If CUDA or HIP are enabled, make sure the GPU version has been defined.
 if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
   if [ "${GPUVER}" = "no" ]; then
@@ -708,9 +710,10 @@ if [ "${ENABLE_CUDA}" = "__TRUE__" ] || [ "${ENABLE_HIP}" = "__TRUE__" ]; then
   fi
 fi
 
-# several packages require cmake.
-if [ "${with_scalapack}" = "__INSTALL__" ]; then
-  [ "${with_cmake}" = "__DONTUSE__" ] && with_cmake="__INSTALL__"
+# ABACUS itself and some dependencies require cmake.
+if [ "${with_cmake}" = "__DONTUSE__" ]; then
+  report_error "CMake is required for ABACUS and some dependencies. Please enable it."
+  exit 1
 fi
 
 
@@ -816,45 +819,6 @@ fi
 
 echo "Compiling with $(get_nprocs) processes for target ${TARGET_CPU}."
 
-# Select the correct compute number based on the GPU architecture
-case ${GPUVER} in
-  K20X)
-    export ARCH_NUM="35"
-    ;;
-  K40)
-    export ARCH_NUM="35"
-    ;;
-  K80)
-    export ARCH_NUM="37"
-    ;;
-  P100)
-    export ARCH_NUM="60"
-    ;;
-  V100)
-    export ARCH_NUM="70"
-    ;;
-  A100)
-    export ARCH_NUM="80"
-    ;;
-  Mi50)
-    # TODO: export ARCH_NUM=
-    ;;
-  Mi100)
-    # TODO: export ARCH_NUM=
-    ;;
-  Mi250)
-    # TODO: export ARCH_NUM=
-    ;;
-  no)
-    export ARCH_NUM="no"
-    ;;
-  *)
-    report_error ${LINENO} \
-      "--gpu-ver currently only supports K20X, K40, K80, P100, V100, A100, Mi50, Mi100, Mi250, and no as options"
-    exit 1
-    ;;
-esac
-
 write_toolchain_env ${INSTALLDIR}
 
 # write toolchain config
diff --git a/toolchain/scripts/stage3/install_elpa.sh b/toolchain/scripts/stage3/install_elpa.sh
index 01e7980810..94cc3d1bb9 100755
--- a/toolchain/scripts/stage3/install_elpa.sh
+++ b/toolchain/scripts/stage3/install_elpa.sh
@@ -98,26 +98,27 @@ case "$with_elpa" in
           config_flags="--enable-avx-kernels=${has_AVX} --enable-avx2-kernels=${has_AVX2} --enable-avx512-kernels=${has_AVX512}"
         fi
       fi
+      # CUDA_CFLAGS="-std=c++14 -allow-unsupported-compiler" \
       for TARGET in "cpu" "nvidia"; do
         [ "$TARGET" = "nvidia" ] && [ "$ENABLE_CUDA" != "__TRUE__" ] && continue
+        # disable cpu if cuda is enabled
+        [ "$TARGET" != "nvidia" ] && [ "$ENABLE_CUDA" = "__TRUE__" ] && continue
         echo "Installing from scratch into ${pkg_install_dir}/${TARGET}"
-
         mkdir -p "build_${TARGET}"
         cd "build_${TARGET}"
-        if [ "${with_amd}" != "__DONTUSE__" ]; then
-        echo "AMD compiler detected, enable special option operation"
+        if [ "${with_amd}" != "__DONTUSE__" ] && [ "${with_flang}" = "yes" ] ; then
+        echo "AMD fortran compiler detected, enable special option operation"
         ../configure --prefix="${pkg_install_dir}/${TARGET}/" \
           --libdir="${pkg_install_dir}/${TARGET}/lib" \
           --enable-openmp=${enable_openmp} \
-          --enable-shared=yes \
           --enable-static=yes \
+          --enable-shared=yes \
           --disable-c-tests \
           --disable-cpp-tests \
           ${config_flags} \
           --enable-nvidia-gpu-kernels=$([ "$TARGET" = "nvidia" ] && echo "yes" || echo "no") \
           --with-cuda-path=${CUDA_PATH:-${CUDA_HOME:-/CUDA_HOME-notset}} \
-          --with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_75") \
-          CUDA_CFLAGS="-std=c++14 -allow-unsupported-compiler" \
+          --with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_70") \
           OMPI_MCA_plm_rsh_agent=/bin/false \
           FC=${MPIFC} \
           CC=${MPICC} \
@@ -138,16 +139,14 @@ case "$with_elpa" in
         ../configure --prefix="${pkg_install_dir}/${TARGET}/" \
           --libdir="${pkg_install_dir}/${TARGET}/lib" \
           --enable-openmp=${enable_openmp} \
-          --enable-shared=yes \
           --enable-static=yes \
+          --enable-shared=yes \
           --disable-c-tests \
           --disable-cpp-tests \
           ${config_flags} \
           --enable-nvidia-gpu-kernels=$([ "$TARGET" = "nvidia" ] && echo "yes" || echo "no") \
           --with-cuda-path=${CUDA_PATH:-${CUDA_HOME:-/CUDA_HOME-notset}} \
-          --with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_75") \
-          CUDA_CFLAGS="-std=c++14 -allow-unsupported-compiler" \
-          OMPI_MCA_plm_rsh_agent=/bin/false \
+          --with-NVIDIA-GPU-compute-capability=$([ "$TARGET" = "nvidia" ] && echo "sm_$ARCH_NUM" || echo "sm_70") \
           FC=${MPIFC} \
           CC=${MPICC} \
           CXX=${MPICXX} \
diff --git a/toolchain/scripts/tool_kit.sh b/toolchain/scripts/tool_kit.sh
index 8ecfc9decb..e03c125c3b 100755
--- a/toolchain/scripts/tool_kit.sh
+++ b/toolchain/scripts/tool_kit.sh
@@ -369,12 +369,13 @@ check_command() {
 }
 
 # check if directory exists
+# add more error msg by QuantumMisaka in 2025.03.19
 check_dir() {
   local __dir=$1
   if [ -d "$__dir" ]; then
     echo "Found directory $__dir"
   else
-    report_error "Cannot find $__dir"
+    report_error "Cannot find $__dir, please check your --with-PKG input to march options: [system|install|no|(path/to/pkg)]"
     return 1
   fi
 }
diff --git a/toolchain/toolchain_amd.sh b/toolchain/toolchain_amd.sh
index b8055176c6..797f7c67ea 100755
--- a/toolchain/toolchain_amd.sh
+++ b/toolchain/toolchain_amd.sh
@@ -33,5 +33,5 @@
 --with-4th-openmpi=no \
 --with-flang=no \
 | tee compile.log
-# if you want to use openmpi-version4: set --with-4th-openmpi=yes
+# to use openmpi-version4: set --with-4th-openmpi=yes
 # flang is not recommended to use in this stage
\ No newline at end of file
diff --git a/toolchain/toolchain_gnu.sh b/toolchain/toolchain_gnu.sh
index bf5be6a129..bea1ee0793 100755
--- a/toolchain/toolchain_gnu.sh
+++ b/toolchain/toolchain_gnu.sh
@@ -12,6 +12,8 @@
 # libtorch and libnpy are for deepks support, which can be =no
 # if you want to run EXX calculation, you should set --with-libri=install
 # mpich (and intel toolchain) is recommended for EXX support
+# gpu-lcao supporting modify: CUDA_PATH and --enable-cuda
+# export CUDA_PATH=/usr/local/cuda
 
 ./install_abacus_toolchain.sh \
 --with-gcc=system \
@@ -31,4 +33,9 @@
 --with-libcomm=no \
 --with-4th-openmpi=no \
 | tee compile.log
-# if you want to use openmpi-version4: set --with-4th-openmpi=yes
\ No newline at end of file
+# to use openmpi-version4: set --with-4th-openmpi=yes
+# to enable gpu-lcao, add the following lines:
+# --enable-cuda \
+# --gpu-ver=75 \ 
+# one should check your gpu compute capability number 
+# and use it in --gpu-ver
diff --git a/toolchain/toolchain_intel-mpich.sh b/toolchain/toolchain_intel-mpich.sh
index 1f50679f1a..afa9871762 100755
--- a/toolchain/toolchain_intel-mpich.sh
+++ b/toolchain/toolchain_intel-mpich.sh
@@ -30,4 +30,4 @@
 --with-libcomm=no \
 --with-intel-classic=no \
 | tee compile.log
-# if you are using AMD server: set --with-intel-classic=yes
\ No newline at end of file
+# for using AMD-CPU or GPU-version: set --with-intel-classic=yes
\ No newline at end of file
diff --git a/toolchain/toolchain_intel.sh b/toolchain/toolchain_intel.sh
index d12afc919d..4af7236896 100755
--- a/toolchain/toolchain_intel.sh
+++ b/toolchain/toolchain_intel.sh
@@ -12,7 +12,7 @@
 # libtorch and libnpy are for deepks support, which can be =no
 
 # module load mkl mpi compiler
-
+export CUDA_PATH=/usr/local/cuda
 ./install_abacus_toolchain.sh \
 --with-intel=system \
 --math-mode=mkl \
@@ -31,4 +31,9 @@
 --with-libcomm=no \
 --with-intel-classic=no \
 | tee compile.log
-# if you are using AMD server: set --with-intel-classic=yes
\ No newline at end of file
+# for using AMD-CPU or GPU-version: set --with-intel-classic=yes
+# to enable gpu-lcao, add the following lines:
+# --enable-cuda \
+# --gpu-ver=75 \ 
+# one should check your gpu compute capability number 
+# and use it in --gpu-ver
\ No newline at end of file