Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
eee8b75
Phase 1 of RT-TDDFT GPU Acceleration: Rewriting existing code using T…
AsTonyshment Dec 26, 2024
aa4ceb1
[pre-commit.ci lite] apply automatic fixes
pre-commit-ci-lite[bot] Dec 26, 2024
069c434
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Dec 27, 2024
e45398a
Initialize int info in bandenergy.cpp
AsTonyshment Dec 27, 2024
a6040ec
Initialize double aa, bb in bandenergy.cpp
AsTonyshment Dec 27, 2024
0bebb32
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Dec 30, 2024
ac4e737
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Dec 30, 2024
8ed6407
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Dec 31, 2024
e67b42f
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Dec 31, 2024
9e4b889
Fix a bug where CopyFrom caused shared data between tensors, using =(…
AsTonyshment Dec 31, 2024
9ca053d
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 2, 2025
ba12e92
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 3, 2025
3110720
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 3, 2025
eda3add
RT-TDDFT GPU Acceleration (Phase 2): Adding needed BLAS and LAPACK su…
AsTonyshment Jan 3, 2025
4685fb8
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 6, 2025
717c164
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 6, 2025
e3c493d
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 10, 2025
d89f9a3
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 11, 2025
7f94b4d
LAPACK wrapper functions: change const basic-type input parameters fr…
AsTonyshment Jan 13, 2025
0e458b9
Did nothing, just formatting esolver.cpp
AsTonyshment Jan 13, 2025
824168d
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 14, 2025
b9f8ca4
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Jan 14, 2025
bdc6cf6
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 15, 2025
fbe01cd
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Jan 15, 2025
5044ac5
Merge branch 'develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 17, 2025
d732808
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Jan 17, 2025
0e6c42c
Core algorithm: RT-TD now has preliminary support for GPU computation
AsTonyshment Jan 17, 2025
20fd170
Fix GitHub CI CUDA build bug due to deleted variable
AsTonyshment Jan 17, 2025
1d9e60f
Refactor some files
AsTonyshment Jan 18, 2025
c6559dd
Getting ready for gathering MPI processes
AsTonyshment Jan 18, 2025
698bec2
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 18, 2025
38ad956
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Jan 18, 2025
4f24415
MPI multi-process compatibility
AsTonyshment Jan 19, 2025
cca5fa9
Fix GitHub CI MPI compilation bug
AsTonyshment Jan 19, 2025
62df525
Minor fix and refactor
AsTonyshment Jan 20, 2025
8b526a9
Merge branch 'deepmodeling:develop' into TDDFT_GPU_phase_1
AsTonyshment Jan 20, 2025
fde9d05
Initialize double aa, bb and one line for one variable
AsTonyshment Jan 21, 2025
87893a9
Rename bandenergy.cpp to band_energy.cpp and corresponding adjustments
AsTonyshment Jan 21, 2025
a02a352
Fix compile error and change CMakeLists accordingly
AsTonyshment Jan 21, 2025
2bdc83f
Merge branch 'TDDFT_GPU_phase_1' of github.com:AsTonyshment/abacus-de…
AsTonyshment Jan 21, 2025
214bdb8
Initialize int naroc
AsTonyshment Jan 21, 2025
e4ab72a
Initialize MPI related variables: myid, num_procs and root_proc
AsTonyshment Jan 21, 2025
dc54ffd
Refactor Propagator class implementation into multiple files for bett…
AsTonyshment Jan 21, 2025
079f791
Remove all GlobalV::ofs_running from RT-TDDFT core algorithms and pas…
AsTonyshment Jan 21, 2025
c0ca245
Add assert in some places and optimize redundant index calculations i…
AsTonyshment Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions source/module_base/lapack_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ extern "C"

// zgetrf computes the LU factorization of a general matrix
// while zgetri takes its output to perform matrix inversion
void zgetrf_(const int* m, const int *n, const std::complex<double> *A, const int *lda, int *ipiv, const int* info);
void zgetri_(const int* n, std::complex<double> *A, const int *lda, int *ipiv, std::complex<double> *work, int *lwork, const int *info);
void zgetrf_(const int* m, const int *n, std::complex<double> *A, const int *lda, int *ipiv, int* info);
void zgetri_(const int* n, std::complex<double>* A, const int* lda, const int* ipiv, std::complex<double>* work, const int* lwork, int* info);

// if trans=='N': C = alpha * A * A.H + beta * C
// if trans=='C': C = alpha * A.H * A + beta * C
Expand Down
45 changes: 45 additions & 0 deletions source/module_base/module_container/ATen/kernels/lapack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,41 @@ struct lapack_dngvd<T, DEVICE_CPU> {
}
};

template <typename T>
struct lapack_getrf<T, DEVICE_CPU> {
void operator()(
const int& m,
const int& n,
T* Mat,
const int& lda,
int* ipiv,
int& info)
{
lapackConnector::getrf(m, n, Mat, lda, ipiv, info);
if (info != 0) {
throw std::runtime_error("getrf failed with info = " + std::to_string(info));
}
}
};

template <typename T>
struct lapack_getri<T, DEVICE_CPU> {
void operator()(
const int& n,
T* Mat,
const int& lda,
const int* ipiv,
T* work,
const int& lwork,
int& info)
{
lapackConnector::getri(n, Mat, lda, ipiv, work, lwork, info);
if (info != 0) {
throw std::runtime_error("getri failed with info = " + std::to_string(info));
}
}
};

template struct set_matrix<float, DEVICE_CPU>;
template struct set_matrix<double, DEVICE_CPU>;
template struct set_matrix<std::complex<float>, DEVICE_CPU>;
Expand All @@ -149,5 +184,15 @@ template struct lapack_dngvd<double, DEVICE_CPU>;
template struct lapack_dngvd<std::complex<float>, DEVICE_CPU>;
template struct lapack_dngvd<std::complex<double>, DEVICE_CPU>;

template struct lapack_getrf<float, DEVICE_CPU>;
template struct lapack_getrf<double, DEVICE_CPU>;
template struct lapack_getrf<std::complex<float>, DEVICE_CPU>;
template struct lapack_getrf<std::complex<double>, DEVICE_CPU>;

template struct lapack_getri<float, DEVICE_CPU>;
template struct lapack_getri<double, DEVICE_CPU>;
template struct lapack_getri<std::complex<float>, DEVICE_CPU>;
template struct lapack_getri<std::complex<double>, DEVICE_CPU>;

} // namespace kernels
} // namespace container
26 changes: 26 additions & 0 deletions source/module_base/module_container/ATen/kernels/lapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,32 @@ struct lapack_dngvd {
Real* eigen_val);
};


template <typename T, typename Device>
struct lapack_getrf {
void operator()(
const int& m,
const int& n,
T* Mat,
const int& lda,
int* ipiv,
int& info);
};


template <typename T, typename Device>
struct lapack_getri {
void operator()(
const int& n,
T* Mat,
const int& lda,
const int* ipiv,
T* work,
const int& lwork,
int& info);
};


#if defined(__CUDA) || defined(__ROCM)
// TODO: Use C++ singleton to manage the GPU handles
void createGpuSolverHandle(); // create cusolver handle
Expand Down
51 changes: 51 additions & 0 deletions source/module_base/module_container/base/third_party/lapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ void dtrtri_(const char* uplo, const char* diag, const int* n, double* a, const
void ctrtri_(const char* uplo, const char* diag, const int* n, std::complex<float>* a, const int* lda, int* info);
void ztrtri_(const char* uplo, const char* diag, const int* n, std::complex<double>* a, const int* lda, int* info);

void sgetrf_(const int* m, const int* n, float* a, const int* lda, int* ipiv, int* info);
void dgetrf_(const int* m, const int* n, double* a, const int* lda, int* ipiv, int* info);
void cgetrf_(const int* m, const int* n, std::complex<float>* a, const int* lda, int* ipiv, int* info);
void zgetrf_(const int* m, const int* n, std::complex<double>* a, const int* lda, int* ipiv, int* info);

void sgetri_(const int* n, float* A, const int* lda, const int* ipiv, float* work, const int* lwork, int* info);
void dgetri_(const int* n, double* A, const int* lda, const int* ipiv, double* work, const int* lwork, int* info);
void cgetri_(const int* n, std::complex<float>* A, const int* lda, const int* ipiv, std::complex<float>* work, const int* lwork, int* info);
void zgetri_(const int* n, std::complex<double>* A, const int* lda, const int* ipiv, std::complex<double>* work, const int* lwork, int* info);
}

// Class LapackConnector provide the connector to fortran lapack routine.
Expand Down Expand Up @@ -321,6 +330,48 @@ void trtri( const char &uplo, const char &diag, const int &n, std::complex<doubl
ztrtri_( &uplo, &diag, &n, A, &lda, &info);
}

static inline
void getrf(const int &m, const int &n, float* A, const int &lda, int* ipiv, int &info)
{
sgetrf_(&m, &n, A, &lda, ipiv, &info);
}
static inline
void getrf(const int &m, const int &n, double* A, const int &lda, int* ipiv, int &info)
{
dgetrf_(&m, &n, A, &lda, ipiv, &info);
}
static inline
void getrf(const int &m, const int &n, std::complex<float>* A, const int &lda, int* ipiv, int &info)
{
cgetrf_(&m, &n, A, &lda, ipiv, &info);
}
static inline
void getrf(const int &m, const int &n, std::complex<double>* A, const int &lda, int* ipiv, int &info)
{
zgetrf_(&m, &n, A, &lda, ipiv, &info);
}

static inline
void getri(const int& n, float* A, const int& lda, const int* ipiv, float* work, const int& lwork, int& info)
{
sgetri_(&n, A, &lda, ipiv, work, &lwork, &info);
}
static inline
void getri(const int& n, double* A, const int& lda, const int* ipiv, double* work, const int& lwork, int& info)
{
dgetri_(&n, A, &lda, ipiv, work, &lwork, &info);
}
static inline
void getri(const int& n, std::complex<float>* A, const int& lda, const int* ipiv, std::complex<float>* work, const int& lwork, int& info)
{
cgetri_(&n, A, &lda, ipiv, work, &lwork, &info);
}
static inline
void getri(const int& n, std::complex<double>* A, const int& lda, const int* ipiv, std::complex<double>* work, const int& lwork, int& info)
{
zgetri_(&n, A, &lda, ipiv, work, &lwork, &info);
}

} // namespace lapackConnector
} // namespace container

Expand Down
Loading
Loading