diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index d97eb3bccd..742ed7a4a7 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -231,7 +231,10 @@ In chronological order: * [2024-01-24] Optimize GEMV forwarding on ARM64 systems * Aniket P. Garade Sushil Pratap Singh Juliya James - * [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE + * [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE * Annop Wongwathanarat - * [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1 \ No newline at end of file + * [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1 + +* Marek Michalowski + * [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1` diff --git a/interface/gemv.c b/interface/gemv.c index 2c121f1308..f91f364eed 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT }; #endif +#ifdef DYNAMIC_ARCH + extern char* gotoblas_corename(void); +#endif + +#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) +static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { + return + MN < 25600L ? 1 + : MN < 63001L ? MIN(ncpu, 4) + : MN < 459684L ? MIN(ncpu, 16) + : ncpu; +} +#endif + +static inline int get_gemv_optimal_nthreads(BLASLONG MN) { + int ncpu = num_cpu_avail(3); +#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); +#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + if (strcmp(gotoblas_corename(), "neoversev1") == 0) { + return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); + } +#endif + + if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) + return 1; + else + return num_cpu_avail(2); +} + #ifndef CBLAS void NAME(char *TRANS, blasint *M, blasint *N, @@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order, STACK_ALLOC(buffer_size, FLOAT, buffer); #ifdef SMP - - if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD ) - nthreads = 1; - else - nthreads = num_cpu_avail(2); + nthreads = get_gemv_optimal_nthreads(1L * m * n); if (nthreads == 1) { #endif