@@ -28,12 +28,13 @@ void compute_ekb(const Parallel_Orbitals* pv,
2828 double * ekb,
2929 std::ofstream& ofs_running)
3030{
31+ assert (pv->nloc_wfc > 0 && pv->nloc > 0 );
3132
3233 std::complex <double >* tmp1 = new std::complex <double >[pv->nloc_wfc ];
3334 ModuleBase::GlobalFunc::ZEROS (tmp1, pv->nloc_wfc );
3435
35- std::complex <double >* Eij = new std::complex <double >[pv->nloc ];
36- ModuleBase::GlobalFunc::ZEROS (Eij , pv->nloc );
36+ std::complex <double >* eij = new std::complex <double >[pv->nloc ];
37+ ModuleBase::GlobalFunc::ZEROS (eij , pv->nloc );
3738
3839 ScalapackConnector::gemm (' N' ,
3940 ' N' ,
@@ -70,7 +71,7 @@ void compute_ekb(const Parallel_Orbitals* pv,
7071 1 ,
7172 pv->desc_wfc ,
7273 0.0 ,
73- Eij ,
74+ eij ,
7475 1 ,
7576 1 ,
7677 pv->desc_Eij );
@@ -83,12 +84,11 @@ void compute_ekb(const Parallel_Orbitals* pv,
8384 ofs_running << " Eij:" << std::endl;
8485 for (int i = 0 ; i < pv->nrow_bands ; i++)
8586 {
87+ const int in = i * pv->ncol ;
8688 for (int j = 0 ; j < pv->ncol_bands ; j++)
8789 {
88- double aa = 0.0 ;
89- double bb = 0.0 ;
90- aa = Eij[i * pv->ncol + j].real ();
91- bb = Eij[i * pv->ncol + j].imag ();
90+ double aa = eij[in + j].real ();
91+ double bb = eij[in + j].imag ();
9292 if (std::abs (aa) < PARAM.inp .td_print_eij )
9393 {
9494 aa = 0.0 ;
@@ -112,8 +112,10 @@ void compute_ekb(const Parallel_Orbitals* pv,
112112 int info = 0 ;
113113 int naroc[2 ] = {0 , 0 };
114114
115- double * Eii = new double [nband];
116- ModuleBase::GlobalFunc::ZEROS (Eii, nband);
115+ assert (nband > 0 );
116+ double * eii = new double [nband];
117+ ModuleBase::GlobalFunc::ZEROS (eii, nband);
118+
117119 for (int iprow = 0 ; iprow < pv->dim0 ; ++iprow)
118120 {
119121 for (int ipcol = 0 ; ipcol < pv->dim1 ; ++ipcol)
@@ -138,18 +140,18 @@ void compute_ekb(const Parallel_Orbitals* pv,
138140 }
139141 if (igcol == igrow)
140142 {
141- Eii [igcol] = Eij [j * naroc[0 ] + i].real ();
143+ eii [igcol] = eij [j * naroc[0 ] + i].real ();
142144 }
143145 }
144146 }
145147 }
146148 } // loop ipcol
147149 } // loop iprow
148- info = MPI_Allreduce (Eii , ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm ());
150+ info = MPI_Allreduce (eii , ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm ());
149151
150152 delete[] tmp1;
151- delete[] Eij ;
152- delete[] Eii ;
153+ delete[] eij ;
154+ delete[] eii ;
153155}
154156
155157void compute_ekb_tensor (const Parallel_Orbitals* pv,
@@ -160,12 +162,14 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
160162 ct::Tensor& ekb,
161163 std::ofstream& ofs_running)
162164{
165+ assert (pv->nloc_wfc > 0 && pv->nloc > 0 );
166+
163167 // Create Tensor objects for temporary data
164168 ct::Tensor tmp1 (ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({pv->nloc_wfc }));
165169 tmp1.zero ();
166170
167- ct::Tensor Eij (ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({pv->nloc }));
168- Eij .zero ();
171+ ct::Tensor eij (ct::DataType::DT_COMPLEX_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({pv->nloc }));
172+ eij .zero ();
169173
170174 // Perform matrix multiplication: tmp1 = Htmp * psi_k
171175 ScalapackConnector::gemm (' N' ,
@@ -188,7 +192,7 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
188192 1 ,
189193 pv->desc_wfc );
190194
191- // Perform matrix multiplication: Eij = psi_k^dagger * tmp1
195+ // Perform matrix multiplication: eij = psi_k^dagger * tmp1
192196 ScalapackConnector::gemm (' C' ,
193197 ' N' ,
194198 nband,
@@ -204,7 +208,7 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
204208 1 ,
205209 pv->desc_wfc ,
206210 0.0 ,
207- Eij .data <std::complex <double >>(),
211+ eij .data <std::complex <double >>(),
208212 1 ,
209213 1 ,
210214 pv->desc_Eij );
@@ -217,12 +221,11 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
217221 ofs_running << " Eij:" << std::endl;
218222 for (int i = 0 ; i < pv->nrow_bands ; i++)
219223 {
224+ const int in = i * pv->ncol ;
220225 for (int j = 0 ; j < pv->ncol_bands ; j++)
221226 {
222- double aa = 0.0 ;
223- double bb = 0.0 ;
224- aa = Eij.data <std::complex <double >>()[i * pv->ncol + j].real ();
225- bb = Eij.data <std::complex <double >>()[i * pv->ncol + j].imag ();
227+ double aa = eij.data <std::complex <double >>()[in + j].real ();
228+ double bb = eij.data <std::complex <double >>()[in + j].imag ();
226229 if (std::abs (aa) < PARAM.inp .td_print_eij )
227230 {
228231 aa = 0.0 ;
@@ -246,9 +249,10 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
246249 int info = 0 ;
247250 int naroc[2 ] = {0 , 0 };
248251
249- // Create a Tensor for Eii
250- ct::Tensor Eii (ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({nband}));
251- Eii.zero ();
252+ // Create a Tensor for eii
253+ assert (nband > 0 );
254+ ct::Tensor eii (ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, ct::TensorShape ({nband}));
255+ eii.zero ();
252256
253257 for (int iprow = 0 ; iprow < pv->dim0 ; ++iprow)
254258 {
@@ -274,7 +278,7 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
274278 }
275279 if (igcol == igrow)
276280 {
277- Eii .data <double >()[igcol] = Eij .data <std::complex <double >>()[j * naroc[0 ] + i].real ();
281+ eii .data <double >()[igcol] = eij .data <std::complex <double >>()[j * naroc[0 ] + i].real ();
278282 }
279283 }
280284 }
@@ -283,7 +287,7 @@ void compute_ekb_tensor(const Parallel_Orbitals* pv,
283287 } // loop iprow
284288
285289 // Perform MPI reduction to compute ekb
286- info = MPI_Allreduce (Eii .data <double >(), ekb.data <double >(), nband, MPI_DOUBLE, MPI_SUM, pv->comm ());
290+ info = MPI_Allreduce (eii .data <double >(), ekb.data <double >(), nband, MPI_DOUBLE, MPI_SUM, pv->comm ());
287291}
288292
289293template <typename Device>
@@ -306,11 +310,11 @@ void compute_ekb_tensor_lapack(const Parallel_Orbitals* pv,
306310 ct::TensorShape ({nlocal * nband})); // tmp1 shape: nlocal * nband
307311 tmp1.zero ();
308312
309- ct::Tensor Eij (ct::DataType::DT_COMPLEX_DOUBLE,
313+ ct::Tensor eij (ct::DataType::DT_COMPLEX_DOUBLE,
310314 ct_device_type,
311- ct::TensorShape ({nlocal * nlocal})); // Eij shape: nlocal * nlocal
315+ ct::TensorShape ({nlocal * nlocal})); // eij shape: nlocal * nlocal
312316 // Why not use nband * nband ?????
313- Eij .zero ();
317+ eij .zero ();
314318
315319 std::complex <double > alpha = {1.0 , 0.0 };
316320 std::complex <double > beta = {0.0 , 0.0 };
@@ -330,7 +334,7 @@ void compute_ekb_tensor_lapack(const Parallel_Orbitals* pv,
330334 tmp1.data <std::complex <double >>(),
331335 nlocal); // Leading dimension of tmp1
332336
333- // Perform matrix multiplication: Eij = psi_k^dagger * tmp1
337+ // Perform matrix multiplication: eij = psi_k^dagger * tmp1
334338 ct::kernels::blas_gemm<std::complex <double >, ct_Device>()(' C' ,
335339 ' N' ,
336340 nband,
@@ -342,25 +346,24 @@ void compute_ekb_tensor_lapack(const Parallel_Orbitals* pv,
342346 tmp1.data <std::complex <double >>(),
343347 nlocal, // Leading dimension of tmp1
344348 &beta,
345- Eij .data <std::complex <double >>(),
346- nlocal); // Leading dimension of Eij
349+ eij .data <std::complex <double >>(),
350+ nlocal); // Leading dimension of eij
347351
348352 if (PARAM.inp .td_print_eij >= 0.0 )
349353 {
350- ct::Tensor Eij_cpu = Eij .to_device <ct::DEVICE_CPU>();
354+ ct::Tensor eij_cpu = eij .to_device <ct::DEVICE_CPU>();
351355
352356 ofs_running
353357 << " ------------------------------------------------------------------------------------------------"
354358 << std::endl;
355359 ofs_running << " Eij:" << std::endl;
356360 for (int i = 0 ; i < nband; i++)
357361 {
362+ const int in = i * nlocal;
358363 for (int j = 0 ; j < nband; j++)
359364 {
360- double aa = 0.0 ;
361- double bb = 0.0 ;
362- aa = Eij_cpu.data <std::complex <double >>()[i * nlocal + j].real ();
363- bb = Eij_cpu.data <std::complex <double >>()[i * nlocal + j].imag ();
365+ double aa = eij_cpu.data <std::complex <double >>()[in + j].real ();
366+ double bb = eij_cpu.data <std::complex <double >>()[in + j].imag ();
364367 if (std::abs (aa) < PARAM.inp .td_print_eij )
365368 {
366369 aa = 0.0 ;
@@ -381,15 +384,15 @@ void compute_ekb_tensor_lapack(const Parallel_Orbitals* pv,
381384 << std::endl;
382385 }
383386
384- // Extract diagonal elements of Eij into ekb
387+ // Extract diagonal elements of eij into ekb
385388 if (ct_device_type == ct::DeviceType::GpuDevice)
386389 {
387390 // GPU implementation
388391 for (int i = 0 ; i < nband; ++i)
389392 {
390393 base_device::memory::synchronize_memory_op<double , Device, Device>()(
391394 ekb.data <double >() + i,
392- reinterpret_cast <const double *>(Eij .data <std::complex <double >>() + i * nlocal + i),
395+ reinterpret_cast <const double *>(eij .data <std::complex <double >>() + i * nlocal + i),
393396 1 );
394397 }
395398 }
@@ -398,7 +401,7 @@ void compute_ekb_tensor_lapack(const Parallel_Orbitals* pv,
398401 // CPU implementation
399402 for (int i = 0 ; i < nband; ++i)
400403 {
401- ekb.data <double >()[i] = Eij .data <std::complex <double >>()[i * nlocal + i].real ();
404+ ekb.data <double >()[i] = eij .data <std::complex <double >>()[i * nlocal + i].real ();
402405 }
403406 }
404407}
0 commit comments