Skip to content

Commit 6eeb2c2

Browse files
committed
Merge branch 'SDFT' of https://github.com/deepmodeling/abacus-develop into SDFT
2 parents 6d7111a + 4c4cb9e commit 6eeb2c2

File tree

16 files changed

+992
-864
lines changed

16 files changed

+992
-864
lines changed

source/Makefile.Objects

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ unk_overlap_pw.o \
5959
berryphase.o \
6060
sto_iter.o\
6161
sto_wf.o\
62+
sto_func.o\
6263
sto_hchi.o\
63-
sto_che.o\
6464
sto_forces.o\
6565
sto_stress_pw.o
6666

@@ -81,6 +81,7 @@ math_integral.o\
8181
math_ylmreal.o\
8282
mathzone_add1.o\
8383
math_bspline.o\
84+
math_chebyshev.o\
8485
integral.o \
8586
polint.o \
8687
sph_bessel.o \

source/module_base/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ add_library(
1616
math_polyint.cpp
1717
math_sphbes.cpp
1818
math_ylmreal.cpp
19-
math_bspline.cpp
19+
math_bspline.cpp
20+
math_chebyshev.cpp
2021
mathzone.cpp
2122
mathzone_add1.cpp
2223
matrix.cpp
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#include "./math_chebyshev.h"
2+
#include "./constants.h"
3+
#include "./blas_connector.h"
4+
#include "./global_function.h"
5+
namespace ModuleBase
6+
{
7+
//we only have two examples: double and float.
8+
template class Chebyshev<double>;
9+
#ifdef __MIX_PRECISION
10+
template class Chebyshev<float>;
11+
#endif
12+
13+
FFTW<double>::FFTW(const int norder2_in)
14+
{
15+
ccoef = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * norder2_in);
16+
dcoef = (double *) fftw_malloc(sizeof(double) * norder2_in);
17+
coef_plan = fftw_plan_dft_r2c_1d(norder2_in, dcoef, ccoef, FFTW_ESTIMATE);
18+
}
19+
FFTW<double>::~FFTW()
20+
{
21+
fftw_destroy_plan(coef_plan);
22+
fftw_free(ccoef);
23+
fftw_free(dcoef);
24+
}
25+
void FFTW<double>::execute_fftw()
26+
{
27+
fftw_execute(this->coef_plan);
28+
}
29+
30+
#ifdef __MIX_PRECISION
31+
FFTW<float>::FFTW(const int norder2_in)
32+
{
33+
ccoef = (fftwf_complex *) fftw_malloc(sizeof(fftwf_complex) * norder2_in);
34+
dcoef = (float *) fftw_malloc(sizeof(float) * norder2_in);
35+
coef_plan = fftwf_plan_dft_r2c_1d(norder2_in, dcoef, ccoef, FFTW_ESTIMATE);
36+
}
37+
FFTW<float>::~FFTW()
38+
{
39+
fftwf_destroy_plan(coef_plan);
40+
fftw_free(ccoef);
41+
fftw_free(dcoef);
42+
}
43+
void FFTW<float>::execute_fftw()
44+
{
45+
fftwf_execute(this->coef_plan);
46+
}
47+
#endif
48+
49+
//A number to control the number of grids in C_n integration
50+
#define EXTEND 16
51+
52+
template<typename REAL>
53+
Chebyshev<REAL>::Chebyshev(const int norder_in) : fftw(2 * EXTEND * norder_in)
54+
{
55+
this->norder = norder_in;
56+
norder2 = 2 * norder * EXTEND;
57+
if(this->norder < 1)
58+
{
59+
ModuleBase::WARNING_QUIT("Stochastic_Chebychev", "The Chebyshev expansion order should be at least 1!");
60+
}
61+
polytrace = new REAL [norder];
62+
coef_real = new REAL [norder];
63+
coef_complex = new std::complex<REAL> [norder];
64+
65+
// ndmin = ndmax = ndmax_in;
66+
67+
getcoef_complex = false;
68+
getcoef_real = false;
69+
}
70+
71+
template<typename REAL>
72+
Chebyshev<REAL>::~Chebyshev()
73+
{
74+
delete [] polytrace;
75+
delete [] coef_real;
76+
delete [] coef_complex;
77+
}
78+
79+
template<typename REAL>
80+
REAL Chebyshev<REAL>:: ddot_real(
81+
const std::complex<REAL>* psi_L,
82+
const std::complex<REAL>* psi_R,
83+
const int N, const int LDA, const int m)
84+
{
85+
REAL result = 0;
86+
if(N == LDA || m==1)
87+
{
88+
int dim2=2 * N * m;
89+
REAL *pL,*pR;
90+
pL=(REAL *)psi_L;
91+
pR=(REAL *)psi_R;
92+
result=BlasConnector::dot(dim2,pL,1,pR,1);
93+
}
94+
else
95+
{
96+
REAL *pL,*pR;
97+
pL=(REAL *)psi_L;
98+
pR=(REAL *)psi_R;
99+
for(int i = 0 ; i < m ; ++i)
100+
{
101+
int dim2=2 * N;
102+
result += BlasConnector::dot(dim2,pL,1,pR,1);
103+
pL += 2 * LDA;
104+
pR += 2 * LDA;
105+
}
106+
}
107+
return result;
108+
}
109+
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#ifndef STO_CHEBYCHEV_H
2+
#define STO_CHEBYCHEV_H
3+
#include <complex>
4+
#include "fftw3.h"
5+
6+
namespace ModuleBase
7+
{
8+
//template class for fftw
9+
template<typename T>
10+
class FFTW;
11+
12+
/**
13+
* @brief A class to treat the Chebyshev expansion.
14+
*
15+
* @author qianrui on 2022-05-18
16+
* @details
17+
* Math:
18+
* I.
19+
* Chebyshev polynomial:
20+
* T_0(x) = 1;
21+
* T_1(x) = x;
22+
* T_2(x) = 2x^2 -1;
23+
* T_3(x) = 4x^3-3x;
24+
* T_{n+2}(x) = 2xT_{n+1}(x) - T_n(x)
25+
* II.
26+
* Any analytical function f(x) can be expanded by Chebyshev polynomial:
27+
* f(x) = \sum_{n=0}^{norder} C_n[f]*T_n(x) (|x| < 1),
28+
* where C_n[f] = \frac{2-\delta_{n0}}{\pi} \int_0^\pi f(cos(\theta))cos(n\theta) d\theta
29+
* Here C_n can be calculate with FFT.
30+
* III.
31+
* Any functions of linear Operator or matrix f(\hat{A}) or f(A) can also be expanded as well:
32+
* f(A) = \sum_{n=0}^{norder-1} C_n[f]*T_n(A) (|all eigenvalues of A| < 1).
33+
* f(A)v = \sum_{n=0}^{norder-1} C_n[f]*T_n(A)v, where v is column vector
34+
* = \sum_{n=0}^{norder-1} C_n[f]*v_n, where v_n = T_n(A)v, v_0 = v
35+
* v_{n+2} = 2Av_{n+1} - v_n
36+
* IV.
37+
* v^+f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v^+v_n = \sum_{n=0}^{norder-1} C_n[f] * w_n,
38+
* where w_n = v^+ * v_n = v^+ * T_n(A) * v
39+
*
40+
* USAGE:
41+
* Chebyshev che(10); // constructe a chebyshev expansion of 10 orders (n=0,1,...,9)
42+
* 1. che.calcoef_real(&a, &A::cos) // calculate C_n[f], where f is a.cos
43+
* for(inti=0;i<10;++i) cout<<che.coef_real[i]<<endl; //Then we print C_n[f]
44+
*
45+
* che.calcoef_complex(&b, &B::expi) // calculate C_n[g], where g is b.expi
46+
* for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g]
47+
*
48+
* che.calcoef_pair(&c, &C::cos, &C::sin) // calculate C_n[g], where g is (c.cos, c.sin)
49+
* for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g]
50+
*
51+
* 2. che.calcoef_real(&occ, &Occupy::fd)
52+
* che.calfinalvec_real(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw);
53+
* //calculate f(H)|psi>, where f is occ.fd and H is hamilt.hpsi
54+
*
55+
* che.calcoef_complex(&b, &B::expi)
56+
* che.calfinalvec_complex(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw, npwx, nbands);
57+
* //calculate exp(iH)|psi_i>
58+
*
59+
* 3. che.tracepolyA(&hamilt, &Hamilt::hpsi, psi_in, npw, npwx, nbands)
60+
* //calculate \sum_i^{nbands} <psi_i|T_n(H)|psi_i>
61+
*
62+
* 4. che.recurs_complex(&hamilt, &Hamilt::hpsi, vp1, v, vm1, npw)
63+
* //calculate vp1: |vp1> = 2 H|v> - |vm1>;
64+
*
65+
*/
66+
template<typename REAL>
67+
class Chebyshev
68+
{
69+
70+
public:
71+
72+
// constructor and deconstructor
73+
Chebyshev(const int norder);
74+
~Chebyshev();
75+
76+
public:
77+
// I.
78+
// Calculate coefficients C_n[f], where f is a function of real number
79+
template<class T>
80+
void calcoef_real(T *ptr, REAL (T::*fun)(REAL));
81+
// Calculate coefficients C_n[g], where g is a function of complex number
82+
template<class T>
83+
void calcoef_complex(T *ptr, std::complex<REAL> (T::*fun)(std::complex<REAL>));
84+
// Calculate coefficients C_n[g], where g is a general complex function g(x)=(g1(x), g2(x)) e.g. exp(ix)=(cos(x), sin(x))
85+
template<class T>
86+
void calcoef_pair(T *ptr, REAL (T::*fun1)(REAL), REAL (T::*fun2)(REAL));
87+
88+
// II.
89+
// Calculate the final vector f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v_n
90+
// Here funA(in, out) means the map v -> Av : funA(v, Av)
91+
// Here m represents we treat m vectors at the same time: f(A)[v1,...,vm] and funA(in,out,m) means [v1,...,vm] -> A[v1,...,vm]
92+
// N is dimension of vector, and LDA is the distance between the first number of v_n and v_{n+1}.
93+
// LDA >= max(1, N). It is the same as the BLAS lib.
94+
// calfinalvec_real uses C_n[f], where f is a function of real number and A is a real Operator.
95+
template<class T>
96+
void calfinalvec(T *ptr,
97+
void (T::*funA)(REAL *in, REAL *out, const int),
98+
REAL *wavein, REAL *waveout,
99+
const int N, const int LDA = 1, const int m = 1);
100+
101+
// calfinalvec_real uses C_n[f], where f is a function of real number and A is a complex Operator.
102+
template<class T>
103+
void calfinalvec_real(T *ptr,
104+
void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int),
105+
std::complex<REAL> *wavein, std::complex<REAL> *waveout,
106+
const int N, const int LDA = 1, const int m = 1);
107+
108+
// calfinalvec_complex uses C_n[g], where g is a function of complex number and A is a complex Operator.
109+
template<class T>
110+
void calfinalvec_complex(T *ptr,
111+
void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int),
112+
std::complex<REAL> *wavein, std::complex<REAL> *waveout,
113+
const int N, const int LDA = 1, const int m = 1);
114+
115+
// III.
116+
// \sum_i v_i^+f(A)v_i = \sum_{i,n=0}^{norder-1} C_n[f]*v_i^+v_{i,n} = \sum_{n=0}^{norder-1} C_n[f] * w_n
117+
// calculate the sum of diagonal elements (Trace) of T_n(A) in v-represent: w_n = \sum_i v_i^+ * T_n(A) * v_i
118+
// i = 1,2,...m
119+
template<class T>
120+
void tracepolyA(
121+
T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int),
122+
std::complex<REAL> *wavein,
123+
const int N, const int LDA = 1, const int m = 1);
124+
125+
// IV.
126+
// recurs fomula: v_{n+1} = 2Av_n - v_{n-1}
127+
// get v_{n+1} from v_n and v_{n-1}
128+
// recurs_complex: A is a real operator
129+
template<class T>
130+
void recurs_real(
131+
T *ptr, void (T::*funA)(REAL *in, REAL *out, const int),
132+
REAL* arraynp1, //v_{n+1}
133+
REAL* arrayn, //v_n
134+
REAL* arrayn_1, //v_{n-1}
135+
const int N, const int LDA = 1, const int m = 1);
136+
// recurs_complex: A is a complex operator
137+
template<class T>
138+
void recurs_complex(
139+
T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int),
140+
std::complex<REAL>* arraynp1, //v_{n+1}
141+
std::complex<REAL>* arrayn, //v_n
142+
std::complex<REAL>* arrayn_1, //v_{n-1}
143+
const int N, const int LDA = 1, const int m = 1);
144+
145+
// V.
146+
// auxiliary function
147+
// Abs of all eigenvalues of A should be less than 1.
148+
// Thus \hat(a) = \frac{(A - (tmax+tmin)/2)}{(tmax-tmin)/2}
149+
// tmax >= all eigenvalues; tmin <= all eigenvalues
150+
// Here we check if the trial number tmax(tmin) is the upper(lower) bound of eigenvalues and return it.
151+
template<class T>
152+
bool checkconverge(
153+
T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int),
154+
std::complex<REAL> *wavein, const int N,
155+
REAL& tmax, //trial number for upper bound
156+
REAL& tmin, //trial number for lower bound
157+
REAL stept); //tmax = max() + stept, tmin = min() - stept
158+
159+
public:
160+
//Members:
161+
int norder; // order of Chebyshev expansion
162+
int norder2; // 2 * norder * EXTEND
163+
164+
REAL* coef_real; // expansion coefficient of each order
165+
std::complex<REAL>* coef_complex; // expansion coefficient of each order
166+
FFTW<REAL> fftw; //use for fftw
167+
REAL *polytrace; //w_n = \sum_i v^+ * T_n(A) * v
168+
169+
bool getcoef_real; //coef_real has been calculated
170+
bool getcoef_complex; //coef_complex has been calculated
171+
172+
private:
173+
//SI.
174+
//calculate dot product <psi_L|psi_R>
175+
REAL ddot_real(
176+
const std::complex<REAL>* psi_L,
177+
const std::complex<REAL>* psi_R,
178+
const int N, const int LDA = 1, const int m = 1);
179+
180+
181+
};
182+
183+
template<>
184+
class FFTW<double>
185+
{
186+
public:
187+
FFTW(const int norder2_in);
188+
~FFTW();
189+
void execute_fftw();
190+
double* dcoef; //[norder2]
191+
fftw_complex *ccoef;
192+
fftw_plan coef_plan;
193+
};
194+
195+
#ifdef __MIX_PRECISION
196+
template<>
197+
class FFTW<float>
198+
{
199+
public:
200+
FFTW(const int norder2_in);
201+
~FFTW();
202+
void execute_fftw();
203+
double* dcoef; //[norder2]
204+
fftwf_complex *ccoef;
205+
fftwf_plan coef_plan;
206+
};
207+
#endif
208+
}
209+
210+
#include "math_chebyshev_def.h"
211+
212+
#endif

0 commit comments

Comments
 (0)