|
| 1 | +#ifndef STO_CHEBYCHEV_H |
| 2 | +#define STO_CHEBYCHEV_H |
| 3 | +#include <complex> |
| 4 | +#include "fftw3.h" |
| 5 | + |
| 6 | +namespace ModuleBase |
| 7 | +{ |
| 8 | +//template class for fftw |
| 9 | +template<typename T> |
| 10 | +class FFTW; |
| 11 | + |
| 12 | +/** |
| 13 | + * @brief A class to treat the Chebyshev expansion. |
| 14 | + * |
| 15 | + * @author qianrui on 2022-05-18 |
| 16 | + * @details |
| 17 | + * Math: |
| 18 | + * I. |
| 19 | + * Chebyshev polynomial: |
| 20 | + * T_0(x) = 1; |
| 21 | + * T_1(x) = x; |
| 22 | + * T_2(x) = 2x^2 -1; |
| 23 | + * T_3(x) = 4x^3-3x; |
| 24 | + * T_{n+2}(x) = 2xT_{n+1}(x) - T_n(x) |
| 25 | + * II. |
| 26 | + * Any analytical function f(x) can be expanded by Chebyshev polynomial: |
| 27 | + * f(x) = \sum_{n=0}^{norder} C_n[f]*T_n(x) (|x| < 1), |
| 28 | + * where C_n[f] = \frac{2-\delta_{n0}}{\pi} \int_0^\pi f(cos(\theta))cos(n\theta) d\theta |
| 29 | + * Here C_n can be calculate with FFT. |
| 30 | + * III. |
| 31 | + * Any functions of linear Operator or matrix f(\hat{A}) or f(A) can also be expanded as well: |
| 32 | + * f(A) = \sum_{n=0}^{norder-1} C_n[f]*T_n(A) (|all eigenvalues of A| < 1). |
| 33 | + * f(A)v = \sum_{n=0}^{norder-1} C_n[f]*T_n(A)v, where v is column vector |
| 34 | + * = \sum_{n=0}^{norder-1} C_n[f]*v_n, where v_n = T_n(A)v, v_0 = v |
| 35 | + * v_{n+2} = 2Av_{n+1} - v_n |
| 36 | + * IV. |
| 37 | + * v^+f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v^+v_n = \sum_{n=0}^{norder-1} C_n[f] * w_n, |
| 38 | + * where w_n = v^+ * v_n = v^+ * T_n(A) * v |
| 39 | + * |
| 40 | + * USAGE: |
| 41 | + * Chebyshev che(10); // constructe a chebyshev expansion of 10 orders (n=0,1,...,9) |
| 42 | + * 1. che.calcoef_real(&a, &A::cos) // calculate C_n[f], where f is a.cos |
| 43 | + * for(inti=0;i<10;++i) cout<<che.coef_real[i]<<endl; //Then we print C_n[f] |
| 44 | + * |
| 45 | + * che.calcoef_complex(&b, &B::expi) // calculate C_n[g], where g is b.expi |
| 46 | + * for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g] |
| 47 | + * |
| 48 | + * che.calcoef_pair(&c, &C::cos, &C::sin) // calculate C_n[g], where g is (c.cos, c.sin) |
| 49 | + * for(inti=0;i<10;++i) cout<<che.coef_complex[i]<<endl; //Then we print C_n[g] |
| 50 | + * |
| 51 | + * 2. che.calcoef_real(&occ, &Occupy::fd) |
| 52 | + * che.calfinalvec_real(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw); |
| 53 | + * //calculate f(H)|psi>, where f is occ.fd and H is hamilt.hpsi |
| 54 | + * |
| 55 | + * che.calcoef_complex(&b, &B::expi) |
| 56 | + * che.calfinalvec_complex(&hamilt, &Hamilt::hpsi, psi_in, psi_out, npw, npwx, nbands); |
| 57 | + * //calculate exp(iH)|psi_i> |
| 58 | + * |
| 59 | + * 3. che.tracepolyA(&hamilt, &Hamilt::hpsi, psi_in, npw, npwx, nbands) |
| 60 | + * //calculate \sum_i^{nbands} <psi_i|T_n(H)|psi_i> |
| 61 | + * |
| 62 | + * 4. che.recurs_complex(&hamilt, &Hamilt::hpsi, vp1, v, vm1, npw) |
| 63 | + * //calculate vp1: |vp1> = 2 H|v> - |vm1>; |
| 64 | + * |
| 65 | + */ |
| 66 | +template<typename REAL> |
| 67 | +class Chebyshev |
| 68 | +{ |
| 69 | + |
| 70 | +public: |
| 71 | + |
| 72 | + // constructor and deconstructor |
| 73 | + Chebyshev(const int norder); |
| 74 | + ~Chebyshev(); |
| 75 | + |
| 76 | +public: |
| 77 | + // I. |
| 78 | + // Calculate coefficients C_n[f], where f is a function of real number |
| 79 | + template<class T> |
| 80 | + void calcoef_real(T *ptr, REAL (T::*fun)(REAL)); |
| 81 | + // Calculate coefficients C_n[g], where g is a function of complex number |
| 82 | + template<class T> |
| 83 | + void calcoef_complex(T *ptr, std::complex<REAL> (T::*fun)(std::complex<REAL>)); |
| 84 | + // Calculate coefficients C_n[g], where g is a general complex function g(x)=(g1(x), g2(x)) e.g. exp(ix)=(cos(x), sin(x)) |
| 85 | + template<class T> |
| 86 | + void calcoef_pair(T *ptr, REAL (T::*fun1)(REAL), REAL (T::*fun2)(REAL)); |
| 87 | + |
| 88 | + // II. |
| 89 | + // Calculate the final vector f(A)v = \sum_{n=0}^{norder-1} C_n[f]*v_n |
| 90 | + // Here funA(in, out) means the map v -> Av : funA(v, Av) |
| 91 | + // Here m represents we treat m vectors at the same time: f(A)[v1,...,vm] and funA(in,out,m) means [v1,...,vm] -> A[v1,...,vm] |
| 92 | + // N is dimension of vector, and LDA is the distance between the first number of v_n and v_{n+1}. |
| 93 | + // LDA >= max(1, N). It is the same as the BLAS lib. |
| 94 | + // calfinalvec_real uses C_n[f], where f is a function of real number and A is a real Operator. |
| 95 | + template<class T> |
| 96 | + void calfinalvec(T *ptr, |
| 97 | + void (T::*funA)(REAL *in, REAL *out, const int), |
| 98 | + REAL *wavein, REAL *waveout, |
| 99 | + const int N, const int LDA = 1, const int m = 1); |
| 100 | + |
| 101 | + // calfinalvec_real uses C_n[f], where f is a function of real number and A is a complex Operator. |
| 102 | + template<class T> |
| 103 | + void calfinalvec_real(T *ptr, |
| 104 | + void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int), |
| 105 | + std::complex<REAL> *wavein, std::complex<REAL> *waveout, |
| 106 | + const int N, const int LDA = 1, const int m = 1); |
| 107 | + |
| 108 | + // calfinalvec_complex uses C_n[g], where g is a function of complex number and A is a complex Operator. |
| 109 | + template<class T> |
| 110 | + void calfinalvec_complex(T *ptr, |
| 111 | + void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int), |
| 112 | + std::complex<REAL> *wavein, std::complex<REAL> *waveout, |
| 113 | + const int N, const int LDA = 1, const int m = 1); |
| 114 | + |
| 115 | + // III. |
| 116 | + // \sum_i v_i^+f(A)v_i = \sum_{i,n=0}^{norder-1} C_n[f]*v_i^+v_{i,n} = \sum_{n=0}^{norder-1} C_n[f] * w_n |
| 117 | + // calculate the sum of diagonal elements (Trace) of T_n(A) in v-represent: w_n = \sum_i v_i^+ * T_n(A) * v_i |
| 118 | + // i = 1,2,...m |
| 119 | + template<class T> |
| 120 | + void tracepolyA( |
| 121 | + T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int), |
| 122 | + std::complex<REAL> *wavein, |
| 123 | + const int N, const int LDA = 1, const int m = 1); |
| 124 | + |
| 125 | + // IV. |
| 126 | + // recurs fomula: v_{n+1} = 2Av_n - v_{n-1} |
| 127 | + // get v_{n+1} from v_n and v_{n-1} |
| 128 | + // recurs_complex: A is a real operator |
| 129 | + template<class T> |
| 130 | + void recurs_real( |
| 131 | + T *ptr, void (T::*funA)(REAL *in, REAL *out, const int), |
| 132 | + REAL* arraynp1, //v_{n+1} |
| 133 | + REAL* arrayn, //v_n |
| 134 | + REAL* arrayn_1, //v_{n-1} |
| 135 | + const int N, const int LDA = 1, const int m = 1); |
| 136 | + // recurs_complex: A is a complex operator |
| 137 | + template<class T> |
| 138 | + void recurs_complex( |
| 139 | + T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int), |
| 140 | + std::complex<REAL>* arraynp1, //v_{n+1} |
| 141 | + std::complex<REAL>* arrayn, //v_n |
| 142 | + std::complex<REAL>* arrayn_1, //v_{n-1} |
| 143 | + const int N, const int LDA = 1, const int m = 1); |
| 144 | + |
| 145 | + // V. |
| 146 | + // auxiliary function |
| 147 | + // Abs of all eigenvalues of A should be less than 1. |
| 148 | + // Thus \hat(a) = \frac{(A - (tmax+tmin)/2)}{(tmax-tmin)/2} |
| 149 | + // tmax >= all eigenvalues; tmin <= all eigenvalues |
| 150 | + // Here we check if the trial number tmax(tmin) is the upper(lower) bound of eigenvalues and return it. |
| 151 | + template<class T> |
| 152 | + bool checkconverge( |
| 153 | + T *ptr, void (T::*funA)(std::complex<REAL> *in, std::complex<REAL> *out, const int), |
| 154 | + std::complex<REAL> *wavein, const int N, |
| 155 | + REAL& tmax, //trial number for upper bound |
| 156 | + REAL& tmin, //trial number for lower bound |
| 157 | + REAL stept); //tmax = max() + stept, tmin = min() - stept |
| 158 | + |
| 159 | +public: |
| 160 | + //Members: |
| 161 | + int norder; // order of Chebyshev expansion |
| 162 | + int norder2; // 2 * norder * EXTEND |
| 163 | + |
| 164 | + REAL* coef_real; // expansion coefficient of each order |
| 165 | + std::complex<REAL>* coef_complex; // expansion coefficient of each order |
| 166 | + FFTW<REAL> fftw; //use for fftw |
| 167 | + REAL *polytrace; //w_n = \sum_i v^+ * T_n(A) * v |
| 168 | + |
| 169 | + bool getcoef_real; //coef_real has been calculated |
| 170 | + bool getcoef_complex; //coef_complex has been calculated |
| 171 | + |
| 172 | +private: |
| 173 | + //SI. |
| 174 | + //calculate dot product <psi_L|psi_R> |
| 175 | + REAL ddot_real( |
| 176 | + const std::complex<REAL>* psi_L, |
| 177 | + const std::complex<REAL>* psi_R, |
| 178 | + const int N, const int LDA = 1, const int m = 1); |
| 179 | + |
| 180 | + |
| 181 | +}; |
| 182 | + |
| 183 | +template<> |
| 184 | +class FFTW<double> |
| 185 | +{ |
| 186 | +public: |
| 187 | + FFTW(const int norder2_in); |
| 188 | + ~FFTW(); |
| 189 | + void execute_fftw(); |
| 190 | + double* dcoef; //[norder2] |
| 191 | + fftw_complex *ccoef; |
| 192 | + fftw_plan coef_plan; |
| 193 | +}; |
| 194 | + |
| 195 | +#ifdef __MIX_PRECISION |
| 196 | +template<> |
| 197 | +class FFTW<float> |
| 198 | +{ |
| 199 | +public: |
| 200 | + FFTW(const int norder2_in); |
| 201 | + ~FFTW(); |
| 202 | + void execute_fftw(); |
| 203 | + double* dcoef; //[norder2] |
| 204 | + fftwf_complex *ccoef; |
| 205 | + fftwf_plan coef_plan; |
| 206 | +}; |
| 207 | +#endif |
| 208 | +} |
| 209 | + |
| 210 | +#include "math_chebyshev_def.h" |
| 211 | + |
| 212 | +#endif |
0 commit comments