Skip to content

Commit 8da0a1f

Browse files
committed
Updated SGEMV ramps.
1 parent d6b25c4 commit 8da0a1f

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

CONTRIBUTORS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
* Annop Wongwathanarat <[email protected]>
3030
* Optimizations and other improvements targeting AArch64
3131

32+
* Anna Mayne <[email protected]>
33+
* Optimizations and other improvements targeting AArch64
34+
3235
## Previous Developers
3336

3437
* Zaheer Chothia <[email protected]>
@@ -267,3 +270,5 @@ In chronological order:
267270
* [2025-05-29] Optimise axpby kernel for RISCV64_ZVL256B
268271
* [2025-06-05] Optimise hbmv kernel for RISCV64_ZVL256B
269272

273+
* Anna Mayne <[email protected]>
274+
* [2025-11-19] Update thread throttling profile for SGEMV on NEOVERSEV1 and NEOVERSEV2

interface/gemv.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
/*********************************************************************/
2+
/* Copyright 2025 The OpenBLAS Project */
23
/* Copyright 2009, 2010 The University of Texas at Austin. */
34
/* All rights reserved. */
45
/* */
@@ -81,9 +82,12 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
8182
: (MN < 1050625L) ? MIN(ncpu, 40)
8283
: ncpu;
8384
#else
84-
return (MN < 25600L) ? 1
85+
return
86+
(MN < 25600L) ? 1
8587
: (MN < 63001L) ? MIN(ncpu, 4)
86-
: (MN < 459684L) ? MIN(ncpu, 16)
88+
: (MN < 202500L) ? MIN(ncpu, 8)
89+
: (MN < 806404L) ? MIN(ncpu, 16)
90+
: (MN < 1638400L) ? MIN(ncpu, 32)
8791
: ncpu;
8892
#endif
8993
}
@@ -93,9 +97,9 @@ static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
9397
static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
9498
return
9599
MN < 24964L ? 1
96-
: MN < 65536L ? MIN(ncpu, 8)
97-
: MN < 262144L ? MIN(ncpu, 32)
98-
: MN < 1638400L ? MIN(ncpu, 64)
100+
: MN < 145924L ? MIN(ncpu, 8)
101+
: MN < 692224L ? MIN(ncpu, 16)
102+
: MN < 1638400L ? MIN(ncpu, 32)
99103
: ncpu;
100104
}
101105
#endif

0 commit comments

Comments
 (0)