Skip to content

Commit 9718aa6

Browse files
authored
Merge branch 'develop' into updata_3.9.0.18
2 parents fd6ccf0 + 2add562 commit 9718aa6

File tree

21 files changed

+1436
-276
lines changed

21 files changed

+1436
-276
lines changed

.github/workflows/build_test_cmake.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
build_args: "-DUSE_CUDA=1"
2828
name: "Build with CUDA support"
2929
- tag: gnu
30-
build_args: "-DENABLE_LCAO=OFF"
30+
build_args: "-DENABLE_LCAO=0"
3131
name: "Build without LCAO"
3232
- tag: gnu
3333
build_args: "-DUSE_ELPA=0 "

Dockerfile.intel

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,116 @@ RUN wget -q https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-
7474
unzip -q /tmp/libtorch.zip -d /opt && rm -f /tmp/libtorch.zip
7575
ENV CMAKE_PREFIX_PATH=/opt/libtorch/share/cmake:${CMAKE_PREFIX_PATH}
7676

77+
78+
###### PEXSI PART: pexsi and gklib and metis and parmetis and superlu_dist
79+
ENV GKLIB_VERSION="master"
80+
ENV METIS_VERSION="master"
81+
ENV PARMETIS_VERSION="main"
82+
ENV SUPERLU_DIST_VERSION=7.2.0
83+
ENV PEXSI_VERSION=2.0.0
84+
ENV GKLIB_ROOT=/usr/local/gklib-${GKLIB_VERSION}
85+
ENV METIS32_ROOT=/usr/local/metis32-${METIS_VERSION}
86+
ENV PARMETIS32_ROOT=/usr/local/parmetis32-${PARMETIS_VERSION}
87+
ENV PEXSI32_ROOT=/usr/local/pexsi32-${PEXSI_VERSION}
88+
ENV SUPERLU_DIST32_ROOT=/usr/local/superlu_dist32-${SUPERLU_DIST_VERSION}
89+
90+
# 2. install GKlib
91+
RUN wget https://codeload.github.com/KarypisLab/GKlib/zip/refs/heads/${GKLIB_VERSION} -O GKlib-${GKLIB_VERSION}.zip && \
92+
unzip GKlib-${GKLIB_VERSION}.zip && \
93+
cd GKlib-${GKLIB_VERSION} && \
94+
make config shared=1 prefix=${GKLIB_ROOT} openmp=set && \
95+
make -j$(nproc) && \
96+
make install && \
97+
ls ${GKLIB_ROOT}/lib && \
98+
cp ${GKLIB_ROOT}/lib/libGKlib.so.0 ${GKLIB_ROOT}/lib/libGKlib.so && \
99+
cd / && rm -rf GKlib-${GKLIB_VERSION} GKlib-${GKLIB_VERSION}.zip
100+
#.so file CANNOT be found otherwise.
101+
# 2. install METIS
102+
RUN export LD_LIBRARY_PATH=${GKLIB_ROOT}/lib:${LD_LIBRARY_PATH} && \
103+
wget https://codeload.github.com/KarypisLab/METIS/zip/refs/heads/${METIS_VERSION} -O METIS-${METIS_VERSION}.zip && \
104+
unzip METIS-${METIS_VERSION}.zip && \
105+
cd METIS-${METIS_VERSION} && \
106+
make config shared=1 prefix=${METIS32_ROOT} gklib_path=${GKLIB_ROOT} && \
107+
make -j$(nproc) && \
108+
make install && \
109+
cd / && rm -rf METIS-${METIS_VERSION} METIS-${METIS_VERSION}.zip
110+
111+
# 3. install ParMETIS
112+
RUN export LD_LIBRARY_PATH=${METIS32_ROOT}/lib:${GKLIB_ROOT}/lib:${LD_LIBRARY_PATH} && \
113+
wget https://codeload.github.com/KarypisLab/ParMETIS/zip/refs/heads/${PARMETIS_VERSION} -O ParMETIS-${PARMETIS_VERSION}.zip && \
114+
unzip ParMETIS-${PARMETIS_VERSION}.zip && \
115+
cd ParMETIS-${PARMETIS_VERSION} && \
116+
make config shared=1 prefix=${PARMETIS32_ROOT} gklib_path=${GKLIB_ROOT} metis_path=${METIS32_ROOT} && \
117+
make -j$(nproc) && \
118+
make install && \
119+
cd / && rm -rf ParMETIS-${PARMETIS_VERSION} ParMETIS-${PARMETIS_VERSION}.zip
120+
121+
# 4. install SuperLU_DIST
122+
RUN wget https://codeload.github.com/xiaoyeli/superlu_dist/tar.gz/refs/tags/v${SUPERLU_DIST_VERSION} -O v${SUPERLU_DIST_VERSION}.tar.gz && \
123+
tar -xzf v${SUPERLU_DIST_VERSION}.tar.gz && \
124+
cd superlu_dist-${SUPERLU_DIST_VERSION} && \
125+
mkdir build && cd build && \
126+
cmake .. \
127+
-DTPL_ENABLE_PARMETISLIB=ON \
128+
-DTPL_PARMETIS_LIBRARIES="${PARMETIS32_ROOT}/lib/libparmetis.so;${METIS32_ROOT}/lib/libmetis.so;${GKLIB_ROOT}/lib/libGKlib.so" \
129+
-DTPL_PARMETIS_INCLUDE_DIRS="${PARMETIS32_ROOT}/include;${METIS32_ROOT}/include;${GKLIB_ROOT}/include" \
130+
-DTPL_ENABLE_INTERNAL_BLASLIB=OFF \
131+
-DTPL_ENABLE_LAPACKLIB=ON \
132+
-DTPL_ENABLE_COMBBLASLIB=OFF \
133+
-DTPL_ENABLE_CUDALIB=OFF \
134+
-Denable_complex16=ON \
135+
-DXSDK_INDEX_SIZE=32 \
136+
-DBUILD_SHARED_LIBS=ON \
137+
-DCMAKE_INSTALL_PREFIX=${SUPERLU_DIST32_ROOT} \
138+
-DCMAKE_C_FLAGS="-O3 -fopenmp" \
139+
-DCMAKE_CXX_FLAGS="-O3 -fopenmp" \
140+
-DXSDK_ENABLE_Fortran=ON \
141+
-DCMAKE_Fortran_COMPILER=mpiifx && \
142+
make -j$(nproc) && \
143+
make install && \
144+
cd / && rm -rf superlu_dist-${SUPERLU_DIST_VERSION} v${SUPERLU_DIST_VERSION}.tar.gz
145+
146+
### -DCMAKE_C_COMPILER=mpiicc \
147+
### -DCMAKE_CXX_COMPILER=mpiicpc \
148+
### -DCMAKE_CUDA_COMPILER=nvcc \
149+
150+
# 5. install PEXSI
151+
RUN export LD_LIBRARY_PATH=${SUPERLU_DIST32_ROOT}/lib:${METIS32_ROOT}/lib:${PARMETIS32_ROOT}/lib:${GKLIB_ROOT}/lib:${LD_LIBRARY_PATH} && \
152+
export LIBRARY_PATH=${SUPERLU_DIST32_ROOT}/lib:${METIS32_ROOT}/lib:${PARMETIS32_ROOT}/lib:${GKLIB_ROOT}/lib:${LIBRARY_PATH} && \
153+
export PKG_CONFIG_PATH=${SUPERLU_DIST32_ROOT}/lib/pkgconfig:${METIS32_ROOT}/lib/pkgconfig:${PARMETIS32_ROOT}/lib/pkgconfig:${GKLIB_ROOT}/lib/pkgconfig:${PKG_CONFIG_PATH} && \
154+
export CPATH=${SUPERLU_DIST32_ROOT}/include:${PARMETIS32_ROOT}/include:${METIS32_ROOT}/include:${GKLIB_ROOT}/include:${CMAKE_PREFIX_PATH} && \
155+
export CMAKE_PREFIX_PATH=${SUPERLU_DIST32_ROOT}:${PARMETIS32_ROOT}:${METIS32_ROOT}:${GKLIB_ROOT}:${CMAKE_PREFIX_PATH} && \
156+
wget https://bitbucket.org/berkeleylab/pexsi/downloads/pexsi_v${PEXSI_VERSION}.tar.gz && \
157+
tar -xzf pexsi_v${PEXSI_VERSION}.tar.gz && \
158+
cd pexsi_v${PEXSI_VERSION} && \
159+
sed -i 's/^add_pexsi_f_example_exe/# add_pexsi_f_example_exe/g' fortran/CMakeLists.txt && \
160+
sed -i 's/^add_pexsi_example_exe/# add_pexsi_example_exe/g' examples/CMakeLists.txt && \
161+
sed -i 's/add_executable/# add_executable/g' fortran/CMakeLists.txt && \
162+
sed -i 's/add_executable/# add_executable/g' examples/CMakeLists.txt && \
163+
mkdir build && cd build && \
164+
cmake .. \
165+
-DCMAKE_INSTALL_PREFIX=${PEXSI32_ROOT} \
166+
-DPEXSI_ENABLE_OPENMP=ON \
167+
-DPEXSI_ENABLE_FORTRAN=OFF && \
168+
make pexsi -j$(nproc) && \
169+
make install && \
170+
cd / && rm -rf pexsi_v${PEXSI_VERSION} pexsi_v${PEXSI_VERSION}.tar.gz
171+
172+
### -DCMAKE_C_COMPILER=mpiicc \
173+
### -DCMAKE_CXX_COMPILER=mpiicpc \
174+
###### END of PEXSI PART
175+
77176
# Clone and build abacus (optional during image build; keep for CI image)
78177
ADD https://api.github.com/repos/deepmodeling/abacus-develop/git/refs/heads/develop /dev/null
79178
RUN export LD_LIBRARY_PATH=${GKLIB_ROOT}/lib:${METIS32_ROOT}/lib:${PARMETIS32_ROOT}/lib:${SUPERLU32_DIST_ROOT}/lib:${PEXSI32_ROOT}/lib:${LD_LIBRARY_PATH} && \
179+
export PKG_CONFIG_PATH=${GKLIB_ROOT}/lib/pkgconfig:${METIS32_ROOT}/lib/pkgconfig:${PARMETIS32_ROOT}/lib/pkgconfig:${SUPERLU32_DIST_ROOT}/lib/pkgconfig:${PEXSI32_ROOT}/lib/pkgconfig:${PKG_CONFIG_PATH} && \
80180
export CPATH=${GKLIB_ROOT}/include:${METIS32_ROOT}/include:${PARMETIS32_ROOT}/include:${SUPERLU32_DIST_ROOT}/include:${PEXSI32_ROOT}/include:${CPATH} && \
181+
export CMAKE_PREFIX_PATH=${PEXSI32_ROOT}:${SUPERLU_DIST32_ROOT}:${PARMETIS32_ROOT}:${METIS32_ROOT}:${GKLIB_ROOT}:${CMAKE_PREFIX_PATH} && \
81182
cd /tmp && git clone https://github.com/deepmodeling/abacus-develop.git --depth 1 && \
82183
cd abacus-develop && \
83184
cmake -B build \
84185
-DENABLE_MLALGO=ON \
186+
-DENABLE_PEXSI=ON \
85187
-DENABLE_LIBXC=ON \
86188
-DENABLE_LIBRI=ON \
87189
-DENABLE_RAPIDJSON=ON \
@@ -92,5 +194,9 @@ RUN export LD_LIBRARY_PATH=${GKLIB_ROOT}/lib:${METIS32_ROOT}/lib:${PARMETIS32_RO
92194
rm -rf /tmp/abacus-develop
93195
# -DMKL_SYCL=OFF \
94196
# -DMKL_SYCL_DISTRIBUTED_DFT:BOOL=OFF \
197+
# cd /tmp && wget https://github.com/deepmodeling/abacus-develop/archive/refs/tags/v3.9.0.17.zip -O abacus-develop-3.9.0.17.zip && \
198+
# unzip abacus-develop-3.9.0.17.zip && \
199+
# cd abacus-develop-3.9.0.17 && \
200+
# sed -i '3i\#include "source_estate/elecstate_lcao.h"' source/source_lcao/edm.cpp && \
95201
# Default entry
96202
CMD ["/bin/bash"]

docs/advanced/input_files/input-main.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3882,10 +3882,13 @@ These variables are used to control berry phase and wannier90 interface paramete
38823882
- **Description**:
38833883
Type of electric field in the time domain.
38843884
- 0: Gaussian type function:
3885+
38853886
$$
38863887
E(t) = A \cos\left[2\pi f(t-t_0)+\varphi\right]\exp\left[-\frac{(t-t_0)^2}{2\sigma^2}\right]
38873888
$$
3889+
38883890
- 1: Trapezoid function:
3891+
38893892
$$
38903893
E(t) =
38913894
\begin{cases}
@@ -3895,18 +3898,23 @@ These variables are used to control berry phase and wannier90 interface paramete
38953898
0, & t \geqslant t_3
38963899
\end{cases}
38973900
$$
3901+
38983902
- 2: Trigonometric function:
3903+
38993904
$$
39003905
E(t) = A \cos(2\pi f_1 t + \varphi_1) \sin^2(2\pi f_2 t + \varphi_2)
39013906
$$
3907+
39023908
- 3: Heaviside step function:
3909+
39033910
$$
39043911
E(t) =
39053912
\begin{cases}
39063913
A, & t < t_0 \\
39073914
0, & t \geqslant t_0
39083915
\end{cases}
39093916
$$
3917+
39103918
- **Default**: 0
39113919

39123920
### td_tstart

source/source_base/module_container/ATen/kernels/blas.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,32 @@
33
namespace container {
44
namespace kernels {
55

6+
7+
template <typename T>
8+
struct blas_copy<T, DEVICE_CPU> {
9+
void operator()(
10+
const int n,
11+
const T *x,
12+
const int incx,
13+
T *y,
14+
const int incy)
15+
{
16+
BlasConnector::copy(n, x, incx, y, incy);
17+
}
18+
};
19+
20+
template <typename T>
21+
struct blas_nrm2<T, DEVICE_CPU> {
22+
using Real = typename GetTypeReal<T>::type;
23+
Real operator()(
24+
const int n,
25+
const T *x,
26+
const int incx)
27+
{
28+
return BlasConnector::nrm2(n, x, incx);
29+
}
30+
};
31+
632
template <typename T>
733
struct blas_dot<T, DEVICE_CPU> {
834
void operator()(
@@ -175,6 +201,17 @@ struct blas_gemm_batched_strided<T, DEVICE_CPU> {
175201
};
176202

177203
// Explicitly instantiate functors for the types of functor registered.
204+
205+
template struct blas_copy<float , DEVICE_CPU>;
206+
template struct blas_copy<double, DEVICE_CPU>;
207+
template struct blas_copy<std::complex<float >, DEVICE_CPU>;
208+
template struct blas_copy<std::complex<double>, DEVICE_CPU>;
209+
210+
template struct blas_nrm2<float , DEVICE_CPU>;
211+
template struct blas_nrm2<double, DEVICE_CPU>;
212+
template struct blas_nrm2<std::complex<float >, DEVICE_CPU>;
213+
template struct blas_nrm2<std::complex<double>, DEVICE_CPU>;
214+
178215
template struct blas_dot<float , DEVICE_CPU>;
179216
template struct blas_dot<double, DEVICE_CPU>;
180217
template struct blas_dot<std::complex<float >, DEVICE_CPU>;
@@ -221,4 +258,4 @@ template struct blas_gemm_batched_strided<std::complex<float >, DEVICE_CPU>;
221258
template struct blas_gemm_batched_strided<std::complex<double>, DEVICE_CPU>;
222259

223260
} // namespace kernels
224-
} // namespace container
261+
} // namespace container

source/source_base/module_container/ATen/kernels/blas.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,26 @@
99
namespace container {
1010
namespace kernels {
1111

12+
template <typename T, typename Device>
13+
struct blas_copy {
14+
// DCOPY copies a vector, x, to a vector, y.
15+
void operator()(
16+
const int n,
17+
const T *x,
18+
const int incx,
19+
T *y,
20+
const int incy);
21+
};
22+
23+
template <typename T, typename Device>
24+
struct blas_nrm2 {
25+
using Real = typename GetTypeReal<T>::type;
26+
Real operator()(
27+
const int n,
28+
const T *x,
29+
const int incx);
30+
};
31+
1232
template <typename T, typename Device>
1333
struct blas_dot {
1434
void operator()(
@@ -168,4 +188,4 @@ void destroyGpuBlasHandle(); // destory blas handle
168188
} // namespace kernels
169189
} // namespace container
170190

171-
#endif // ATEN_KERNELS_BLAS_H_
191+
#endif // ATEN_KERNELS_BLAS_H_

source/source_base/module_container/ATen/kernels/cuda/blas.cu

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,32 @@ void destroyGpuBlasHandle() {
2222
}
2323
}
2424

25+
template <typename T>
26+
struct blas_nrm2<T, DEVICE_GPU> {
27+
using Real = typename GetTypeReal<T>::type;
28+
Real operator()(
29+
const int n,
30+
const T *x,
31+
const int incx)
32+
{
33+
Real result;
34+
cuBlasConnector::nrm2(cublas_handle, n, x, incx, &result);
35+
return result;
36+
}
37+
};
38+
39+
template <typename T>
40+
struct blas_copy<T, DEVICE_GPU> {
41+
void operator()(
42+
const int n,
43+
const T * x,
44+
const int incx,
45+
T *y,
46+
const int incy)
47+
{
48+
cuBlasConnector::copy(cublas_handle, n, x, incx, y, incy);
49+
}
50+
};
2551

2652
template <typename T>
2753
struct blas_dot<T, DEVICE_GPU> {
@@ -76,7 +102,7 @@ struct blas_gemv<T, DEVICE_GPU> {
76102
const int& incx,
77103
const T* beta,
78104
T* y,
79-
const int& incy)
105+
const int& incy)
80106
{
81107
cuBlasConnector::gemv(cublas_handle, trans, m, n, *alpha, A, lda, x, incx, *beta, y, incy);
82108
}
@@ -196,6 +222,19 @@ struct blas_gemm_batched_strided<T, DEVICE_GPU> {
196222
};
197223

198224
// Explicitly instantiate functors for the types of functor registered.
225+
226+
227+
228+
template struct blas_copy<float , DEVICE_GPU>;
229+
template struct blas_copy<double, DEVICE_GPU>;
230+
template struct blas_copy<std::complex<float> , DEVICE_GPU>;
231+
template struct blas_copy<std::complex<double>, DEVICE_GPU>;
232+
233+
template struct blas_nrm2<float , DEVICE_GPU>;
234+
template struct blas_nrm2<double, DEVICE_GPU>;
235+
template struct blas_nrm2<std::complex<float> , DEVICE_GPU>;
236+
template struct blas_nrm2<std::complex<double>, DEVICE_GPU>;
237+
199238
template struct blas_dot<float , DEVICE_GPU>;
200239
template struct blas_dot<double, DEVICE_GPU>;
201240
template struct blas_dot<std::complex<float> , DEVICE_GPU>;
@@ -242,4 +281,4 @@ template struct blas_gemm_batched_strided<std::complex<float >, DEVICE_GPU>;
242281
template struct blas_gemm_batched_strided<std::complex<double>, DEVICE_GPU>;
243282

244283
} // namespace kernels
245-
} // namespace container
284+
} // namespace container

0 commit comments

Comments
 (0)