@@ -445,7 +445,10 @@ void Diago_DavSubspace<T, Device>::cal_elem(const int& dim,
445445 if (this ->diag_comm .nproc > 1 )
446446 {
447447 auto * swap = new T[notconv * this ->nbase_x ];
448+ auto * target = new T[notconv * this ->nbase_x ];
449+
448450 syncmem_complex_op ()(this ->ctx , this ->ctx , swap, hcc + nbase * this ->nbase_x , notconv * this ->nbase_x );
451+
449452 if (std::is_same<T, double >::value)
450453 {
451454 Parallel_Reduce::reduce_pool (hcc + nbase * this ->nbase_x , notconv * this ->nbase_x );
@@ -455,8 +458,15 @@ void Diago_DavSubspace<T, Device>::cal_elem(const int& dim,
455458 {
456459 if (base_device::get_current_precision (swap) == " single" )
457460 {
461+ // MPI_Reduce(swap,
462+ // hcc + nbase * this->nbase_x,
463+ // notconv * this->nbase_x,
464+ // MPI_COMPLEX,
465+ // MPI_SUM,
466+ // 0,
467+ // this->diag_comm.comm);
458468 MPI_Reduce (swap,
459- hcc + nbase * this -> nbase_x ,
469+ target ,
460470 notconv * this ->nbase_x ,
461471 MPI_COMPLEX,
462472 MPI_SUM,
@@ -465,21 +475,36 @@ void Diago_DavSubspace<T, Device>::cal_elem(const int& dim,
465475 }
466476 else
467477 {
478+ // MPI_Reduce(swap,
479+ // hcc + nbase * this->nbase_x,
480+ // notconv * this->nbase_x,
481+ // MPI_DOUBLE_COMPLEX,
482+ // MPI_SUM,
483+ // 0,
484+ // this->diag_comm.comm);
468485 MPI_Reduce (swap,
469- hcc + nbase * this -> nbase_x ,
486+ target ,
470487 notconv * this ->nbase_x ,
471488 MPI_DOUBLE_COMPLEX,
472489 MPI_SUM,
473490 0 ,
474491 this ->diag_comm .comm );
475492 }
476493
494+ syncmem_complex_op ()(this ->ctx , this ->ctx , hcc + nbase * this ->nbase_x , target, notconv * this ->nbase_x );
477495 syncmem_complex_op ()(this ->ctx , this ->ctx , swap, scc + nbase * this ->nbase_x , notconv * this ->nbase_x );
478496
479497 if (base_device::get_current_precision (swap) == " single" )
480498 {
499+ // MPI_Reduce(swap,
500+ // scc + nbase * this->nbase_x,
501+ // notconv * this->nbase_x,
502+ // MPI_COMPLEX,
503+ // MPI_SUM,
504+ // 0,
505+ // this->diag_comm.comm);
481506 MPI_Reduce (swap,
482- scc + nbase * this -> nbase_x ,
507+ target ,
483508 notconv * this ->nbase_x ,
484509 MPI_COMPLEX,
485510 MPI_SUM,
@@ -488,16 +513,25 @@ void Diago_DavSubspace<T, Device>::cal_elem(const int& dim,
488513 }
489514 else
490515 {
516+ // MPI_Reduce(swap,
517+ // scc + nbase * this->nbase_x,
518+ // notconv * this->nbase_x,
519+ // MPI_DOUBLE_COMPLEX,
520+ // MPI_SUM,
521+ // 0,
522+ // this->diag_comm.comm);
491523 MPI_Reduce (swap,
492- scc + nbase * this -> nbase_x ,
524+ target ,
493525 notconv * this ->nbase_x ,
494526 MPI_DOUBLE_COMPLEX,
495527 MPI_SUM,
496528 0 ,
497529 this ->diag_comm .comm );
498530 }
499531 }
532+ syncmem_complex_op ()(this ->ctx , this ->ctx , scc + nbase * this ->nbase_x , target, notconv * this ->nbase_x );
500533 delete[] swap;
534+ delete[] target;
501535 }
502536#endif
503537
0 commit comments