Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b9a811a
Port of gesv_rbt_async() from the Bitbucket repo, unit tests need to …
asenzz Aug 31, 2025
9ccac3a
Remove deprecated v1 interface
nbeams Dec 10, 2024
8cb4cf7
Check ROCm library versions directly
cgmb Dec 20, 2024
be231a8
Drop CMP0037 to fix cmake 4.0 build error
Xeonacid Apr 29, 2025
87bf295
remove blas_fix, which was to support macOS Accelerate
mgates3 Feb 14, 2025
1b5739d
remove macOS support
mgates3 Feb 14, 2025
8f9121c
remove ACML support, since AMD dropped it circa 2017. Also remove out…
mgates3 Feb 14, 2025
4c76e7c
make: remove extraneous check from make.inc files that is repeated in…
mgates3 Jul 8, 2025
f8c541b
make: add AMD AOCL / BLIS & FLAME config
mgates3 Jul 8, 2025
49aaf1d
Fix testing_zhetrf for complex [cz] case. Symmetrize with conj and ma…
mgates3 Jul 8, 2025
afdf8d3
Cleanup testing_zhetrf. Make spacing more consistent, use std::swap.
mgates3 Jul 8, 2025
471406c
Update magma2.F90
weilinscenccs Jun 24, 2025
ebd7251
Improve C/C++ standard setting in CMake
cyyever May 16, 2025
27c3254
Fix fpic
cyyever May 29, 2025
675053b
cmake: require c99 and c++14; prohibit decay to older standards and c…
Jul 17, 2025
c26c19b
fortran: use c_sizeof from f2008. Fixes #55.
Jul 17, 2025
6914cea
add support to Blackwell GPUs (might want to add more sm_xx)
abdelfattah83 Jul 23, 2025
004a8b2
add blackwell to make.inc files
abdelfattah83 Jul 23, 2025
b9f0a0f
remove very old CUDA archs
abdelfattah83 Jul 23, 2025
6ae9acd
also add sm 12.0 when Blackwell is selected
abdelfattah83 Jul 23, 2025
ac4262a
update release notes
abdelfattah83 Jul 24, 2025
0955da3
cmake: add Blackwell
mgates3 Jul 25, 2025
21d102f
archive files that are not in Makefile.src. Note magma_zmlumerge.cpp …
mgates3 Aug 2, 2025
b89e846
make: remove out-dated hg commands
mgates3 Jul 22, 2025
0176416
fix compilation issue with cuda 13
abdelfattah83 Aug 22, 2025
d42d1b5
remove gbtf2 kernels that use cooperative groups
abdelfattah83 Aug 22, 2025
d29cb95
remove unwanted fortran wrappers
abdelfattah83 Aug 22, 2025
d53ec34
Merge remote-tracking branch 'refs/remotes/origin/gesv_rbt_async' int…
asenzz Oct 26, 2025
8b2d25e
Merge branch 'icl-utk-edu:master' into gesv_rbt_async
asenzz Jan 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ else()
project( MAGMA LANGUAGES C CXX )
endif()

FIND_PROGRAM(PROGRAM_CCACHE ccache)
IF (PROGRAM_CCACHE)
SET(CMAKE_CXX_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
SET(CMAKE_C_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
SET(CMAKE_CUDA_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
SET(CMAKE_OPENCL_COMPILER_LAUNCHER ${PROGRAM_CCACHE})
ENDIF ()

# ----------------------------------------
# to show compile commands, set this here or use 'make VERBOSE=1'
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ else ifeq ($(BACKEND),hip)

## Suggestion by Mark (from SLATE)
# Valid architecture numbers
# TODO: remove very old ones?
# TODO: remove veryold ones?
VALID_GFXS = 600 601 602 700 701 702 703 704 705 801 802 803 805 810 900 902 904 906 908 909 90a 940 941 942 90c 1010 1011 1012 1030 1031 1032 1033


Expand Down
32 changes: 32 additions & 0 deletions include/magma_auxiliary.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ magma_int_t magma_get_smlsize_divideconquer();
magma_int_t
magma_malloc( magma_ptr *ptr_ptr, size_t bytes );

magma_int_t
magma_malloc_async( magma_ptr* ptrPtr, size_t size, magma_queue_t queue);

magma_int_t
magma_malloc_cpu( void **ptr_ptr, size_t bytes );

Expand All @@ -93,6 +96,9 @@ magma_free_cpu( void *ptr );
#define magma_free( ptr ) \
magma_free_internal( ptr, __func__, __FILE__, __LINE__ )

#define magma_free_async( ptr, queue ) \
magma_free_internal_async( ptr, __func__, __FILE__, __LINE__, queue )

#define magma_free_pinned( ptr ) \
magma_free_pinned_internal( ptr, __func__, __FILE__, __LINE__ )

Expand All @@ -101,6 +107,11 @@ magma_free_internal(
magma_ptr ptr,
const char* func, const char* file, int line );

magma_int_t
magma_free_internal_async(
magma_ptr ptr,
const char* func, const char* file, int line, magma_queue_t queue );

magma_int_t
magma_free_pinned_internal(
void *ptr,
Expand Down Expand Up @@ -128,24 +139,45 @@ magma_memset_async(void * ptr, int value, size_t count, magma_queue_t queue);
/// Type-safe version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes.
static inline magma_int_t magma_imalloc( magmaInt_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t) ); }

/// Type-safe asynchronous version of magma_malloc(), for magma_int_t arrays. Allocates n*sizeof(magma_int_t) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_imalloc_async( magmaInt_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_int_t), queue ); }

/// Type-safe version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes.
static inline magma_int_t magma_index_malloc( magmaIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t) ); }

/// Type-safe asynchronous version of magma_malloc(), for magma_index_t arrays. Allocates n*sizeof(magma_index_t) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_index_malloc_async( magmaIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_index_t), queue ); }

/// Type-safe version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes.
static inline magma_int_t magma_uindex_malloc( magmaUIndex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t) ); }

/// Type-safe asynchronous version of magma_malloc(), for magma_uindex_t arrays. Allocates n*sizeof(magma_uindex_t) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_uindex_malloc_async( magmaUIndex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magma_uindex_t), queue); }

/// Type-safe version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes.
static inline magma_int_t magma_smalloc( magmaFloat_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(float) ); }

/// Type-safe asynchronous version of magma_malloc(), for float arrays. Allocates n*sizeof(float) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_smalloc_async( magmaFloat_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(float), queue); }

/// Type-safe version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes.
static inline magma_int_t magma_dmalloc( magmaDouble_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(double) ); }

/// Type-safe asynchronous version of magma_malloc(), for double arrays. Allocates n*sizeof(double) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_dmalloc_async( magmaDouble_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(double), queue); }

/// Type-safe version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes.
static inline magma_int_t magma_cmalloc( magmaFloatComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex) ); }

/// Type-safe asynchronous version of magma_malloc(), for magmaFloatComplex arrays. Allocates n*sizeof(magmaFloatComplex) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_cmalloc_async( magmaFloatComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaFloatComplex), queue ); }

/// Type-safe version of magma_malloc(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes.
static inline magma_int_t magma_zmalloc( magmaDoubleComplex_ptr *ptr_ptr, size_t n ) { return magma_malloc( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex) ); }

/// Type-safe asynchronous version of magma_malloc_async(), for magmaDoubleComplex arrays. Allocates n*sizeof(magmaDoubleComplex) bytes using CUDA stream specified in queue.
static inline magma_int_t magma_zmalloc_async( magmaDoubleComplex_ptr *ptr_ptr, size_t n, magma_queue_t queue ) { return magma_malloc_async( (magma_ptr*) ptr_ptr, n*sizeof(magmaDoubleComplex), queue ); }

/// @}


Expand Down
65 changes: 65 additions & 0 deletions include/magma_z.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,16 @@ magma_zgerbt_gpu(
magmaDoubleComplex *U, magmaDoubleComplex *V,
magma_int_t *info);

// CUDA MAGMA only
magma_int_t
magma_zgerbt_gpu_async(
const magma_bool_t gen, const magma_int_t n, const magma_int_t nrhs,
magmaDoubleComplex_ptr const dA, magma_int_t const ldda,
magmaDoubleComplex_ptr const dB, magma_int_t const lddb,
magmaDoubleComplex_ptr const dU, magmaDoubleComplex_ptr const dV,
magma_int_t *info,
magma_queue_t queue);

// CUDA MAGMA only
magma_int_t
magma_zgerfs_nopiv_gpu(
Expand All @@ -488,6 +498,20 @@ magma_zgerfs_nopiv_gpu(
magma_int_t *iter,
magma_int_t *info);

// CUDA MAGMA only
magma_int_t
magma_zgerfs_nopiv_gpu_async(
magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
magmaDoubleComplex_ptr dA, magma_int_t ldda,
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magmaDoubleComplex_ptr dX, magma_int_t lddx,
magmaDoubleComplex_ptr dworkd, magmaDoubleComplex_ptr dAF,
magma_int_t *iter,
magma_int_t *info,
magma_int_t iter_max,
double bwdmax,
magma_queue_t queue);

magma_int_t
magma_zgesdd(
magma_vec_t jobz, magma_int_t m, magma_int_t n,
Expand Down Expand Up @@ -525,6 +549,13 @@ magma_zgesv_nopiv_gpu(
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_int_t *info);

magma_int_t
magma_zgesv_nopiv_gpu_async(
magma_int_t n, magma_int_t nrhs,
magmaDoubleComplex_ptr dA, magma_int_t ldda,
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_int_t *info, magma_queue_t queue );

// CUDA MAGMA only
magma_int_t
magma_zgesv_rbt(
Expand All @@ -533,6 +564,26 @@ magma_zgesv_rbt(
magmaDoubleComplex *B, magma_int_t ldb,
magma_int_t *info);

// CUDA MAGMA only
magma_int_t
magma_zgesv_rbt_async(
const magma_bool_t refine, const magma_int_t n, const magma_int_t nrhs,
const magmaDoubleComplex *const dA, const magma_int_t lda,
magmaDoubleComplex *const dB, const magma_int_t ldb,
magma_int_t *info,
const magma_int_t iter_max, const double bwdmax,
magma_queue_t queue );

// CUDA MAGMA only
magma_int_t
magma_zgesv_rbt_refine_async(
const magma_int_t n, const magma_int_t nrhs,
const magmaDoubleComplex *const dA_, const magma_int_t lda,
magmaDoubleComplex *const dB_, const magma_int_t ldb,
magma_int_t *info,
const magma_int_t iter_max, const double bwdmax,
magma_queue_t queue);

magma_int_t
magma_zgesvd(
magma_vec_t jobu, magma_vec_t jobvt, magma_int_t m, magma_int_t n,
Expand Down Expand Up @@ -676,6 +727,13 @@ magma_zgetrf_nopiv_gpu(
magmaDoubleComplex_ptr dA, magma_int_t ldda,
magma_int_t *info);

magma_int_t
magma_zgetrf_nopiv_gpu_async(
magma_int_t m, magma_int_t n,
magmaDoubleComplex_ptr dA, magma_int_t ldda,
magma_int_t *info,
magma_queue_t queue);

magma_int_t
magma_zgetri_gpu(
magma_int_t n,
Expand Down Expand Up @@ -721,6 +779,13 @@ magma_zgetrs_nopiv_gpu(
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_int_t *info);

magma_int_t
magma_zgetrs_nopiv_gpu_async(
magma_trans_t trans, magma_int_t n, magma_int_t nrhs,
magmaDoubleComplex_ptr dA, magma_int_t ldda,
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_int_t *info, magma_queue_t queue);

// ------------------------------------------------------------ zhe routines
magma_int_t
magma_zheevd(
Expand Down
16 changes: 16 additions & 0 deletions include/magmablas_z.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ magmablas_ztrtri_diag(
magmaDoubleComplex_ptr d_dinvA,
magma_queue_t queue );

void
magmablas_ztrtri_diag_async(
magma_uplo_t uplo, magma_diag_t diag, magma_int_t n,
magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
magmaDoubleComplex_ptr d_dinvA,
magma_queue_t queue );

/*
* to cleanup (alphabetical order)
*/
Expand Down Expand Up @@ -757,6 +764,15 @@ magmablas_ztrsm(
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_queue_t queue );

void
magmablas_ztrsm_async(
magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
magma_int_t m, magma_int_t n,
magmaDoubleComplex alpha,
magmaDoubleComplex_const_ptr dA, magma_int_t ldda,
magmaDoubleComplex_ptr dB, magma_int_t lddb,
magma_queue_t queue );

void
magmablas_ztrsm_outofplace(
magma_side_t side, magma_uplo_t uplo, magma_trans_t transA, magma_diag_t diag,
Expand Down
Loading