@@ -15,10 +15,53 @@ class FFTW;
1515 * @author qianrui on 2022-05-18
1616 * @details
1717 * Math:
18+ * I.
19+ * Chebyshev polynomial:
20+ * T_0(x) = 1;
21+ * T_1(x) = x;
22+ * T_2(x) = 2x^2 -1;
23+ * T_3(x) = 4x^3-3x;
24+ * T_{n+2}(x) = 2xT_{n+1}(x) - T_n(x)
25+ * II.
26+ * Any analytical function f(x) can be expanded by Chebyshev polynomial:
27+ * f(x) = \sum_{n=0}^{norder} C_n[f]*T_n(x) (|x| < 1),
28+ * where C_n[f] = \frac{2-\delta_{n0}}{\pi} \int_0^\pi f(cos(\theta))cos(n\theta) d\theta
29+ * Here C_n can be calculate with FFT.
30+ * III.
31+ * Any functions of linear Operator or matrix f(\hat{A}) or f(A) can also be expanded as well:
32+ * f(A) = \sum_{n=0}^{norder-1} C_n[f]*T_n(A) (|all eigenvalues of A| < 1).
33+ * f(A)v = \sum_{n=0}^{norder-1} C_n[f]*T_n(A)v, where v is column vector
34+ * = \sum_{n=0}^{norder-1} C_n[f]*v_n, where v_n = T_n(A)v, v_0 = v
35+ * v_{n+2} = 2Av_{n+1} - v_n
36+ * IV.
37+ * v^+f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v^+v_n = \sum_{n=0}^{norder-1} C_n[f] * w_n,
38+ * where w_n = v^+ * v_n = v^+ * T_n(A) * v
1839 *
1940 * USAGE:
20- *
21- *
41+ * Chebyshev che(10); // constructe a chebyshev expansion of 10 orders (n=0,1,...,9)
42+ * 1. che.calcoef_real(&a, &A::cos) // calculate C_n[f], where f is a.cos
43+ * for(inti=0;i<10;++i) cout<<che.coef_real[i]<<endl; //Then we print C_n[f]
44+ *
45+ * che.calcoef_complex(&b, &B::expi) // calculate C_n[g], where g is b.expi
46+ * for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g]
47+ *
48+ * che.calcoef_pair(&c, &C::cos, &C::sin) // calculate C_n[g], where g is (c.cos, c.sin)
49+ * for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g]
50+ *
51+ * 2. che.calcoef_real(&occ, &Occupy::fd)
52+ * che.calfinalvec_real(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw);
53+ * //calculate f(H)|psi>, where f is occ.fd and H is hamilt.hpsi
54+ *
55+ * che.calcoef_complex(&b, &B::expi)
56+ * che.calfinalvec_complex(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw, npwx, nbands);
57+ * //calculate exp(iH)|psi_i>
58+ *
59+ * 3. che.tracepolyA(&hamilt, &Hamilt::hpsi, psi_in, npw, npwx, nbands)
60+ * //calculate \sum_i^{nbands} <psi_i|T_n(H)|psi_i>
61+ *
62+ * 4. che.recurs_complex(&hamilt, &Hamilt::hpsi, vp1, v, vm1, npw)
63+ * //calculate vp1: |vp1> = 2 H|v> - |vm1>;
64+ *
2265 */
2366template <typename REAL>
2467class Chebyshev
@@ -27,119 +70,123 @@ class Chebyshev
2770public:
2871
2972 // constructor and deconstructor
30- Chebyshev (const int norder, const int ndmax_in );
73+ Chebyshev (const int norder);
3174 ~Chebyshev ();
3275
3376public:
34-
35- // Calculate coefficients C_n[f], where f is a function of real number
77+ // I.
78+ // Calculate coefficients C_n[f], where f is a function of real number
3679 template <class T >
3780 void calcoef_real (T *ptr, REAL (T::*fun)(REAL));
38- // Calculate coefficients C_n[g], where g is a function of complex number
81+ // Calculate coefficients C_n[g], where g is a function of complex number
3982 template <class T >
4083 void calcoef_complex (T *ptr, std::complex <REAL> (T::*fun)(std::complex <REAL>));
41- // Calculate coefficients C_n[g], where g is a general complex function g(x)=(g1(x), g2(x)) e.g. exp(ix)=(cos(x), sin(x))
84+ // Calculate coefficients C_n[g], where g is a general complex function g(x)=(g1(x), g2(x)) e.g. exp(ix)=(cos(x), sin(x))
4285 template <class T >
4386 void calcoef_pair (T *ptr, REAL (T::*fun1)(REAL), REAL (T::*fun2)(REAL));
4487
45- void calfinalvec_real (
46- void fun (std::complex <REAL> *in, std::complex <REAL> *out, const int ),
47- std::complex<REAL> *wavein,
48- std::complex<REAL> *waveout,
49- const int m = 1);
88+ // II.
89+ // Calculate the final vector f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v_n
90+ // Here funA(in, out) means the map v -> Av : funA(v, Av)
91+ // Here m represents we treat m vectors at the same time: f(A)[v1,...,vm] and funA(in,out,m) means [v1,...,vm] -> A[v1,...,vm]
92+ // N is dimension of vector, and LDA is the distance between the first number of v_n and v_{n+1}.
93+ // LDA >= max(1, N). It is the same as the BLAS lib.
94+ // calfinalvec_real uses C_n[f], where f is a function of real number and A is a real Operator.
95+ template <class T >
96+ void calfinalvec (T *ptr,
97+ void (T::*funA)(REAL *in, REAL *out, const int ),
98+ REAL *wavein, REAL *waveout,
99+ const int N, const int LDA = 1, const int m = 1);
50100
51- void calfinalvec_complex (
52- void fun (std::complex <REAL> *in, std::complex <REAL> *out, const int ),
53- std::complex<REAL> *wavein,
54- std::complex<REAL> *waveout,
55- const int m = 1);
56-
57- bool checkconverge (
58- void tfun (std::complex <REAL> *in, std::complex <REAL> *out, const int ),
59- std::complex<REAL> *wavein,
60- REAL& tmax,
61- REAL& tmin,
62- REAL stept);
63-
64- void calpolyval (
65- void fun (std::complex <REAL> *in, std::complex <REAL> *out, const int ),
101+ // calfinalvec_real uses C_n[f], where f is a function of real number and A is a complex Operator.
102+ template <class T >
103+ void calfinalvec_real (T *ptr,
104+ void (T::*funA)(std::complex <REAL> *in, std::complex <REAL> *out, const int ),
105+ std::complex<REAL> *wavein, std::complex<REAL> *waveout,
106+ const int N, const int LDA = 1, const int m = 1);
107+
108+ // calfinalvec_complex uses C_n[g], where g is a function of complex number and A is a complex Operator.
109+ template <class T >
110+ void calfinalvec_complex (T *ptr,
111+ void (T::*funA)(std::complex <REAL> *in, std::complex <REAL> *out, const int ),
112+ std::complex<REAL> *wavein, std::complex<REAL> *waveout,
113+ const int N, const int LDA = 1, const int m = 1);
114+
115+ // III.
116+ // \sum_i v_i^+f(A)v_i = \sum_{i,n=0}^{norder-1} C_n[f]*v_i^+v_{i,n} = \sum_{n=0}^{norder-1} C_n[f] * w_n
117+ // calculate the sum of diagonal elements (Trace) of T_n(A) in v-represent: w_n = \sum_i v_i^+ * T_n(A) * v_i
118+ // i = 1,2,...m
119+ template <class T >
120+ void tracepolyA (
121+ T *ptr, void (T::*funA)(std::complex <REAL> *in, std::complex <REAL> *out, const int ),
66122 std::complex<REAL> *wavein,
67- const int m =1);
68- void calpolyvec (
69- void fun (std::complex <REAL> *in, std::complex <REAL> *out, const int ),
70- std::complex<REAL> *wavein, std::complex<REAL> *polyvec,
71- const int m =1);
72-
73- // void calpolyval(REAL x);
74- // REAL sumallterms();
123+ const int N, const int LDA = 1, const int m = 1);
75124
125+ // IV.
126+ // recurs fomula: v_{n+1} = 2Av_n - v_{n-1}
127+ // get v_{n+1} from v_n and v_{n-1}
128+ // recurs_complex: A is a real operator
129+ template <class T >
130+ void recurs_real (
131+ T *ptr, void (T::*funA)(REAL *in, REAL *out, const int ),
132+ REAL* arraynp1, // v_{n+1}
133+ REAL* arrayn, // v_n
134+ REAL* arrayn_1, // v_{n-1}
135+ const int N, const int LDA = 1, const int m = 1);
136+ // recurs_complex: A is a complex operator
137+ template <class T >
138+ void recurs_complex (
139+ T *ptr, void (T::*funA)(std::complex <REAL> *in, std::complex <REAL> *out, const int ),
140+ std::complex<REAL>* arraynp1, // v_{n+1}
141+ std::complex<REAL>* arrayn, // v_n
142+ std::complex<REAL>* arrayn_1, // v_{n-1}
143+ const int N, const int LDA = 1, const int m = 1);
144+
145+ // V.
146+ // auxiliary function
147+ // Abs of all eigenvalues of A should be less than 1.
148+ // Thus \hat(a) = \frac{(A - (tmax+tmin)/2)}{(tmax-tmin)/2}
149+ // tmax >= all eigenvalues; tmin <= all eigenvalues
150+ // Here we check if the trial number tmax(tmin) is the upper(lower) bound of eigenvalues and return it.
151+ template <class T >
152+ bool checkconverge (
153+ T *ptr, void (T::*funA)(std::complex <REAL> *in, std::complex <REAL> *out, const int ),
154+ std::complex<REAL> *wavein, const int N,
155+ REAL& tmax, // trial number for upper bound
156+ REAL& tmin, // trial number for lower bound
157+ REAL stept); // tmax = max() + stept, tmin = min() - stept
76158
77- int norder;
159+ public:
160+ // Members:
161+ int norder; // order of Chebyshev expansion
78162 int norder2; // 2 * norder * EXTEND
79163
80- // expansion coefficient of each order
81- // only first norder coefficients are usefull
82- REAL* coef_real;
83- std::complex <REAL>* coef_complex;
84- FFTW<REAL> fftw;
85-
86- REAL *polyvalue;
164+ REAL* coef_real; // expansion coefficient of each order
165+ std::complex <REAL>* coef_complex; // expansion coefficient of each order
166+ FFTW<REAL> fftw; // use for fftw
167+ REAL *polytrace; // w_n = \sum_i v^+ * T_n(A) * v
87168
88- bool getcoef;
89- bool getpolyval;
90- int ndmin; // dim of vector
91- int ndmax; // the distance between two closed vectors
169+ bool getcoef_real; // coef_real has been calculated
170+ bool getcoef_complex; // coef_complex has been calculated
92171
93172private:
94-
95- REAL ddot_real (const int &m,
173+ // SI.
174+ // calculate dot product <psi_L|psi_R>
175+ REAL ddot_real (
96176 const std::complex <REAL>* psi_L,
97- const std::complex <REAL>* psi_R);
177+ const std::complex <REAL>* psi_R,
178+ const int N, const int LDA = 1 , const int m = 1 );
98179
99- template <class T >
100- void recurs (
101- T *arraynp1,
102- T* arrayn,
103- T *arrayn_1,
104- void fun (T *in,T *out, const int ),
105- const int m);
180+
106181};
107182
108- template <typename REAL>
109- template <class T >
110- void Chebyshev<REAL>::recurs(
111- T *arraynp1,
112- T* arrayn,
113- T *arrayn_1,
114- void fun (T *in,T *out, const int ),
115- const int m)
116- {
117- fun (arrayn,arraynp1,m);
118- for (int ib = 0 ; ib < m ; ++ib)
119- {
120- for (int i = 0 ; i < ndmin; ++i)
121- {
122- arraynp1[i+ib*ndmax]=2.0 *arraynp1[i+ib*ndmax]-arrayn_1[i+ib*ndmax];
123- }
124- }
125- }
126-
127183template <>
128184class FFTW <double >
129185{
130186public:
131- FFTW (const int norder2_in)
132- {
133- ccoef = (fftw_complex *) fftw_malloc (sizeof (fftw_complex) * norder2_in);
134- dcoef = (double *) fftw_malloc (sizeof (double ) * norder2_in);
135- coef_plan = fftw_plan_dft_r2c_1d (norder2_in, dcoef, ccoef, FFTW_ESTIMATE);
136- }
137- ~FFTW ()
138- {
139- fftw_destroy_plan (coef_plan);
140- fftw_free (ccoef);
141- fftw_free (dcoef);
142- }
187+ FFTW (const int norder2_in);
188+ ~FFTW ();
189+ void execute_fftw ();
143190 double * dcoef; // [norder2]
144191 fftw_complex *ccoef;
145192 fftw_plan coef_plan;
@@ -150,25 +197,16 @@ template<>
150197class FFTW <float >
151198{
152199public:
153- FFTW (const int norder2_in)
154- {
155- ccoef = (fftwf_complex *) fftw_malloc (sizeof (fftwf_complex) * norder2_in);
156- dcoef = (float *) fftw_malloc (sizeof (float ) * norder2_in);
157- coef_plan = fftwf_plan_dft_r2c_1d (norder2_in, dcoef, ccoef, FFTW_ESTIMATE);
158- }
159- ~FFTW ()
160- {
161- fftwf_destroy_plan (coef_plan);
162- fftw_free (ccoef);
163- fftw_free (dcoef);
164- }
200+ FFTW (const int norder2_in);
201+ ~FFTW ();
202+ void execute_fftw ();
165203 double * dcoef; // [norder2]
166204 fftwf_complex *ccoef;
167205 fftwf_plan coef_plan;
168206};
169207#endif
170-
171208}
209+
172210#include " math_chebyshev_def.h"
173211
174212#endif
0 commit comments