Skip to content

Commit 29776a6

Browse files
authored
Merge branch 'develop' into develop
2 parents 8030dd0 + bf4ce74 commit 29776a6

File tree

113 files changed

+1611
-944
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

113 files changed

+1611
-944
lines changed

.github/workflows/test.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ jobs:
1717
volumes:
1818
- /tmp/ccache:/github/home/.ccache
1919
steps:
20-
- name: Checkout
20+
- name: Checkout repository
2121
uses: actions/checkout@v5
2222
with:
23-
submodules: recursive
2423
fetch-depth: 0
24+
# We will handle submodules manually after fixing ownership
25+
submodules: 'false'
26+
27+
- name: Take ownership of the workspace and update submodules
28+
run: |
29+
sudo chown -R $(whoami) .
30+
git submodule update --init --recursive
2531
2632
- name: Install CI tools
2733
run: |

docs/advanced/input_files/input-main.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,15 +1487,20 @@ These variables are used to control the geometry relaxation.
14871487

14881488
### relax_method
14891489

1490-
- **Type**: String
1490+
- **Type**: Vector of string
14911491
- **Description**: The methods to do geometry optimization.
1492+
the first element:
14921493
- cg: using the conjugate gradient (CG) algorithm. Note that there are two implementations of the conjugate gradient (CG) method, see [relax_new](#relax_new).
1493-
- bfgs: using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
1494-
- bfgs_trad: using the traditional Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
1494+
- bfgs : using the Broyden–Fletcher–Goldfarb–Shanno (BFGS) algorithm.
1495+
- lbfgs: using the Limited-memory Broyden–Fletcher–Goldfarb–Shanno (LBFGS) algorithm.
14951496
- cg_bfgs: using the CG method for the initial steps, and switching to BFGS method when the force convergence is smaller than [relax_cg_thr](#relax_cg_thr).
14961497
- sd: using the steepest descent (SD) algorithm.
14971498
- fire: the Fast Inertial Relaxation Engine method (FIRE), a kind of molecular-dynamics-based relaxation algorithm, is implemented in the molecular dynamics (MD) module. The algorithm can be used by setting [calculation](#calculation) to `md` and [md_type](#md_type) to `fire`. Also ionic velocities should be set in this case. See [fire](../md.md#fire) for more details.
1498-
- **Default**: cg
1499+
1500+
the second element:
1501+
when the first element is bfgs, if the second parameter is 1, it indicates the use of the new BFGS algorithm; if the second parameter is not 1, it indicates the use of the old BFGS algorithm.
1502+
- **Default**: cg 1
1503+
- **Note**:In the 3.10-LTS version, the type of this parameter is std::string. It can be set to "cg","bfgs","cg_bfgs","bfgs_trad","lbfgs","sd","fire".
14991504

15001505
### relax_new
15011506

python/pyabacus/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ list(APPEND _diago
189189
${HSOLVER_PATH}/diago_david.cpp
190190
${HSOLVER_PATH}/diag_const_nums.cpp
191191
${HSOLVER_PATH}/diago_iter_assist.cpp
192-
${HSOLVER_PATH}/kernels/dngvd_op.cpp
192+
${HSOLVER_PATH}/kernels/hegvd_op.cpp
193193
${HSOLVER_PATH}/kernels/bpcg_kernel_op.cpp
194194
${BASE_PATH}/kernels/math_kernel_op.cpp
195195
${BASE_PATH}/kernels/math_kernel_op_vec.cpp

python/pyabacus/src/hsolver/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ list(APPEND _diago
99
${HSOLVER_PATH}/diago_pxxxgvx.cpp
1010

1111

12-
${HSOLVER_PATH}/kernels/dngvd_op.cpp
12+
${HSOLVER_PATH}/kernels/hegvd_op.cpp
1313
${HSOLVER_PATH}/kernels/bpcg_kernel_op.cpp
1414
# dependency
1515
${BASE_PATH}/kernels/math_kernel_op.cpp

python/pyabacus/src/hsolver/py_diago_cg.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class PyDiagoCG
145145
std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data<std::complex<double>>());
146146
};
147147

148-
auto subspace_func = [] (const ct::Tensor& psi_in, ct::Tensor& psi_out) { /*do nothing*/ };
148+
auto subspace_func = [](const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) { /*do nothing*/ };
149149

150150
auto spsi_func = [this] (const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
151151
const auto ndim = psi_in.shape().ndim();

python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ class PyDiagoDavSubspace
144144
dav_ndim,
145145
tol,
146146
max_iter,
147-
need_subspace,
148147
comm_info,
149148
diag_subspace,
150149
nb2d

source/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ list(APPEND device_srcs
3535
source_pw/module_pwdft/kernels/meta_op.cpp
3636
source_pw/module_stodft/kernels/hpsi_norm_op.cpp
3737
source_basis/module_pw/kernels/pw_op.cpp
38-
source_hsolver/kernels/dngvd_op.cpp
38+
source_hsolver/kernels/hegvd_op.cpp
3939
source_hsolver/kernels/bpcg_kernel_op.cpp
4040
source_estate/kernels/elecstate_op.cpp
4141

@@ -70,7 +70,7 @@ if(USE_CUDA)
7070
source_pw/module_stodft/kernels/cuda/hpsi_norm_op.cu
7171
source_pw/module_pwdft/kernels/cuda/onsite_op.cu
7272
source_basis/module_pw/kernels/cuda/pw_op.cu
73-
source_hsolver/kernels/cuda/dngvd_op.cu
73+
source_hsolver/kernels/cuda/hegvd_op.cu
7474
source_hsolver/kernels/cuda/bpcg_kernel_op.cu
7575
source_estate/kernels/cuda/elecstate_op.cu
7676

@@ -101,7 +101,7 @@ if(USE_ROCM)
101101
source_pw/module_pwdft/kernels/rocm/onsite_op.hip.cu
102102
source_pw/module_stodft/kernels/rocm/hpsi_norm_op.hip.cu
103103
source_basis/module_pw/kernels/rocm/pw_op.hip.cu
104-
source_hsolver/kernels/rocm/dngvd_op.hip.cu
104+
source_hsolver/kernels/rocm/hegvd_op.hip.cu
105105
source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu
106106
source_estate/kernels/rocm/elecstate_op.hip.cu
107107

source/Makefile.Objects

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ OBJS_ESOLVER=esolver.o\
267267
esolver_lj.o\
268268
esolver_dp.o\
269269
esolver_of.o\
270+
esolver_of_tddft.o\
270271
esolver_of_tool.o\
271272
esolver_of_interface.o\
272273
pw_others.o\
@@ -361,6 +362,7 @@ OBJS_HAMILT_OF=kedf_tf.o\
361362
kedf_xwm.o\
362363
kedf_lkt.o\
363364
kedf_manager.o\
365+
evolve_ofdft.o\
364366

365367
OBJS_HAMILT_LCAO=hamilt_lcao.o\
366368
operator_lcao.o\
@@ -396,7 +398,7 @@ OBJS_HSOLVER=diago_cg.o\
396398
hsolver_lcaopw.o\
397399
hsolver_pw_sdft.o\
398400
diago_iter_assist.o\
399-
dngvd_op.o\
401+
hegvd_op.o\
400402
bpcg_kernel_op.o\
401403
diag_const_nums.o\
402404
diag_hs_para.o\

source/source_base/kernels/dsp/dsp_connector.cpp

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
extern "C"
77
{
88
#define complex_double ignore_complex_double
9-
#include <mt_hthread_blas.h> // MTBLAS_TRANSPOSE etc
9+
#include <mt_hthread_blas.h> // include faster mtblas kernels
1010
#undef complex_double
11-
#include <mtblas_interface.h> // gemm
11+
#include <mtblas_interface.h> // include normal mtblas kernels that automatically operate memory, but slower.
1212
}
1313
namespace mtfunc
1414
{
@@ -22,45 +22,42 @@ void dspDestoryHandle(int id)
2222
{
2323
hthread_dev_close(id);
2424
std::cout << " ** DSP closed on cluster " << id << " **" << std::endl;
25-
} // Close dsp cluster at the end
25+
} // Close dsp cluster at the end of the program
2626

27-
MTBLAS_TRANSPOSE convertBLASTranspose(const char* blasTrans)
27+
// MTBlas secretly removed its MTBLAS_TRANSPOSE data type and used the original CBLAS_TRANSPOSE. So this function is modified.
28+
29+
CBLAS_TRANSPOSE convertBLASTranspose(const char* blasTrans)
2830
{
2931
switch (blasTrans[0])
3032
{
3133
case 'N':
3234
case 'n':
33-
return MtblasNoTrans;
35+
return CblasNoTrans;
3436
case 'T':
3537
case 't':
36-
return MtblasTrans;
38+
return CblasTrans;
3739
case 'C':
3840
case 'c':
39-
return MtblasConjTrans;
41+
return CblasConjTrans;
4042
default:
4143
std::cout << "Invalid BLAS transpose parameter!! Use default instead." << std::endl;
42-
return MtblasNoTrans;
44+
return CblasNoTrans;
4345
}
44-
} // Used to convert normal transpost char to mtblas transpose flag
46+
} // Used to convert normal transpost char to cblas transpose flag
4547

4648
void* malloc_ht(size_t bytes, int cluster_id)
4749
{
48-
// std::cout << "MALLOC " << cluster_id;
4950
void* ptr = hthread_malloc((int)cluster_id, bytes, HT_MEM_RW);
50-
// std::cout << ptr << " SUCCEED" << std::endl;;
5151
return ptr;
52-
}
52+
} // Malloc on dsp. Used to replace original malloc
53+
5354

54-
// Used to replace original malloc
5555

5656
void free_ht(void* ptr)
5757
{
58-
// std::cout << "FREE " << ptr;
5958
hthread_free(ptr);
60-
// std::cout << " FREE SUCCEED" << std::endl;
61-
}
59+
} // Free on dsp. Used to replace original free
6260

63-
// Used to replace original free
6461

6562
void sgemm_mt_(const char* transa,
6663
const char* transb,
@@ -77,7 +74,7 @@ void sgemm_mt_(const char* transa,
7774
const int* ldc,
7875
int cluster_id)
7976
{
80-
mtblas_sgemm(MTBLAS_ORDER::MtblasColMajor,
77+
mtblas_sgemm(CBLAS_ORDER::CblasColMajor,
8178
convertBLASTranspose(transa),
8279
convertBLASTranspose(transb),
8380
*m,
@@ -109,7 +106,7 @@ void dgemm_mt_(const char* transa,
109106
const int* ldc,
110107
int cluster_id)
111108
{
112-
mtblas_dgemm(MTBLAS_ORDER::MtblasColMajor,
109+
mtblas_dgemm(CBLAS_ORDER::CblasColMajor,
113110
convertBLASTranspose(transa),
114111
convertBLASTranspose(transb),
115112
*m,
@@ -141,7 +138,7 @@ void zgemm_mt_(const char* transa,
141138
const int* ldc,
142139
int cluster_id)
143140
{
144-
mtblas_zgemm(MTBLAS_ORDER::MtblasColMajor,
141+
mtblas_zgemm(CBLAS_ORDER::CblasColMajor,
145142
convertBLASTranspose(transa),
146143
convertBLASTranspose(transb),
147144
*m,
@@ -173,7 +170,7 @@ void cgemm_mt_(const char* transa,
173170
const int* ldc,
174171
int cluster_id)
175172
{
176-
mtblas_cgemm(MTBLAS_ORDER::MtblasColMajor,
173+
mtblas_cgemm(CBLAS_ORDER::CblasColMajor,
177174
convertBLASTranspose(transa),
178175
convertBLASTranspose(transb),
179176
*m,
@@ -207,7 +204,7 @@ void sgemm_mth_(const char* transa,
207204
const int* ldc,
208205
int cluster_id)
209206
{
210-
mt_hthread_sgemm(MTBLAS_ORDER::MtblasColMajor,
207+
mt_hthread_sgemm(CBLAS_ORDER::CblasColMajor,
211208
convertBLASTranspose(transa),
212209
convertBLASTranspose(transb),
213210
*m,
@@ -239,7 +236,7 @@ void dgemm_mth_(const char* transa,
239236
const int* ldc,
240237
int cluster_id)
241238
{
242-
mt_hthread_dgemm(MTBLAS_ORDER::MtblasColMajor,
239+
mt_hthread_dgemm(CBLAS_ORDER::CblasColMajor,
243240
convertBLASTranspose(transa),
244241
convertBLASTranspose(transb),
245242
*m,
@@ -275,7 +272,7 @@ void zgemm_mth_(const char* transa,
275272
*alp = *alpha;
276273
std::complex<double>* bet = (std::complex<double>*)malloc_ht(sizeof(std::complex<double>), cluster_id);
277274
*bet = *beta;
278-
mt_hthread_zgemm(MTBLAS_ORDER::MtblasColMajor,
275+
mt_hthread_zgemm(CBLAS_ORDER::CblasColMajor,
279276
convertBLASTranspose(transa),
280277
convertBLASTranspose(transb),
281278
*m,
@@ -314,7 +311,7 @@ void cgemm_mth_(const char* transa,
314311
std::complex<float>* bet = (std::complex<float>*)malloc_ht(sizeof(std::complex<float>), cluster_id);
315312
*bet = *beta;
316313

317-
mt_hthread_cgemm(MTBLAS_ORDER::MtblasColMajor,
314+
mt_hthread_cgemm(CBLAS_ORDER::CblasColMajor,
318315
convertBLASTranspose(transa),
319316
convertBLASTranspose(transb),
320317
*m,

source/source_base/module_container/ATen/kernels/cuda/lapack.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ struct lapack_potrf<T, DEVICE_GPU> {
8888
};
8989

9090
template <typename T>
91-
struct lapack_dnevd<T, DEVICE_GPU> {
91+
struct lapack_heevd<T, DEVICE_GPU> {
9292
using Real = typename GetTypeReal<T>::type;
9393
void operator()(
9494
const char& jobz,
@@ -97,12 +97,12 @@ struct lapack_dnevd<T, DEVICE_GPU> {
9797
const int& dim,
9898
Real* eigen_val)
9999
{
100-
cuSolverConnector::dnevd(cusolver_handle, jobz, uplo, dim, Mat, dim, eigen_val);
100+
cuSolverConnector::heevd(cusolver_handle, jobz, uplo, dim, Mat, dim, eigen_val);
101101
}
102102
};
103103

104104
template <typename T>
105-
struct lapack_dngvd<T, DEVICE_GPU> {
105+
struct lapack_hegvd<T, DEVICE_GPU> {
106106
using Real = typename GetTypeReal<T>::type;
107107
void operator()(
108108
const int& itype,
@@ -113,7 +113,7 @@ struct lapack_dngvd<T, DEVICE_GPU> {
113113
const int& dim,
114114
Real* eigen_val)
115115
{
116-
cuSolverConnector::dngvd(cusolver_handle, itype, jobz, uplo, dim, Mat_A, dim, Mat_B, dim, eigen_val);
116+
cuSolverConnector::hegvd(cusolver_handle, itype, jobz, uplo, dim, Mat_A, dim, Mat_B, dim, eigen_val);
117117
}
118118
};
119119

@@ -175,15 +175,15 @@ template struct lapack_potrf<double, DEVICE_GPU>;
175175
template struct lapack_potrf<std::complex<float>, DEVICE_GPU>;
176176
template struct lapack_potrf<std::complex<double>, DEVICE_GPU>;
177177

178-
template struct lapack_dnevd<float, DEVICE_GPU>;
179-
template struct lapack_dnevd<double, DEVICE_GPU>;
180-
template struct lapack_dnevd<std::complex<float>, DEVICE_GPU>;
181-
template struct lapack_dnevd<std::complex<double>, DEVICE_GPU>;
178+
template struct lapack_heevd<float, DEVICE_GPU>;
179+
template struct lapack_heevd<double, DEVICE_GPU>;
180+
template struct lapack_heevd<std::complex<float>, DEVICE_GPU>;
181+
template struct lapack_heevd<std::complex<double>, DEVICE_GPU>;
182182

183-
template struct lapack_dngvd<float, DEVICE_GPU>;
184-
template struct lapack_dngvd<double, DEVICE_GPU>;
185-
template struct lapack_dngvd<std::complex<float>, DEVICE_GPU>;
186-
template struct lapack_dngvd<std::complex<double>, DEVICE_GPU>;
183+
template struct lapack_hegvd<float, DEVICE_GPU>;
184+
template struct lapack_hegvd<double, DEVICE_GPU>;
185+
template struct lapack_hegvd<std::complex<float>, DEVICE_GPU>;
186+
template struct lapack_hegvd<std::complex<double>, DEVICE_GPU>;
187187

188188
template struct lapack_getrf<float, DEVICE_GPU>;
189189
template struct lapack_getrf<double, DEVICE_GPU>;

0 commit comments

Comments
 (0)