Skip to content

Commit c2545b0

Browse files
committed
Fixed a few more unnecessary calls to num_cpu_avail.
I don't have as many benchmarks for these as for gemm, but it should still make a difference for small matrices.
1 parent 3313e4b commit c2545b0

18 files changed

+59
-92
lines changed

interface/axpy.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@
4040
#include "common.h"
4141
#ifdef FUNCTION_PROFILE
4242
#include "functable.h"
43-
#endif
43+
#endif
4444
#if defined(Z13)
4545
#define MULTI_THREAD_MINIMAL 200000
4646
#else
47-
#define MULTI_THREAD_MINIMAL 10000
47+
#define MULTI_THREAD_MINIMAL 10000
4848
#endif
4949
#ifndef CBLAS
5050

@@ -83,17 +83,15 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
8383
if (incy < 0) y -= (n - 1) * incy;
8484

8585
#ifdef SMP
86-
nthreads = num_cpu_avail(1);
87-
8886
//disable multi-thread when incx==0 or incy==0
8987
//In that case, the threads would be dependent.
90-
if (incx == 0 || incy == 0)
91-
nthreads = 1;
92-
88+
//
9389
//Temporarily work-around the low performance issue with small imput size &
9490
//multithreads.
95-
if (n <= MULTI_THREAD_MINIMAL)
91+
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
9692
nthreads = 1;
93+
else
94+
nthreads = num_cpu_avail(1);
9795

9896
if (nthreads == 1) {
9997
#endif

interface/scal.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
7676

7777

7878
#ifdef SMP
79-
nthreads = num_cpu_avail(1);
80-
8179
if (n <= 1048576 )
8280
nthreads = 1;
81+
else
82+
nthreads = num_cpu_avail(1);
83+
8384

8485
if (nthreads == 1) {
8586
#endif

interface/zaxpy.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,16 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
9090
if (incy < 0) y -= (n - 1) * incy * 2;
9191

9292
#ifdef SMP
93-
nthreads = num_cpu_avail(1);
94-
9593
//disable multi-thread when incx==0 or incy==0
9694
//In that case, the threads would be dependent.
97-
if (incx == 0 || incy == 0)
98-
nthreads = 1;
99-
100-
//Work around the low performance issue with small imput size &
95+
//
96+
//Temporarily work-around the low performance issue with small imput size &
10197
//multithreads.
102-
if (n <= MULTI_THREAD_MINIMAL) {
98+
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
10399
nthreads = 1;
104-
}
100+
else
101+
nthreads = num_cpu_avail(1);
102+
105103
if (nthreads == 1) {
106104
#endif
107105

interface/zscal.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
9090
FUNCTION_PROFILE_START();
9191

9292
#ifdef SMP
93-
nthreads = num_cpu_avail(1);
94-
9593
if ( n <= 1048576 )
9694
nthreads = 1;
95+
else
96+
nthreads = num_cpu_avail(1);
9797

9898
if (nthreads == 1) {
9999
#endif

interface/zswap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,12 @@ FLOAT *y = (FLOAT*)vy;
7979
if (incy < 0) y -= (n - 1) * incy * 2;
8080

8181
#ifdef SMP
82-
nthreads = num_cpu_avail(1);
83-
8482
//disable multi-thread when incx==0 or incy==0
8583
//In that case, the threads would be dependent.
8684
if (incx == 0 || incy == 0)
8785
nthreads = 1;
86+
else
87+
nthreads = num_cpu_avail(1);
8888

8989
if (nthreads == 1) {
9090
#endif

kernel/arm64/casum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,13 +233,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
233233
FLOAT asum = 0.0;
234234

235235
#if defined(SMP)
236-
nthreads = num_cpu_avail(1);
237-
238-
if (inc_x == 0)
239-
nthreads = 1;
240-
241-
if (n <= 10000)
236+
if (inc_x == 0 || n <= 10000)
242237
nthreads = 1;
238+
else
239+
nthreads = num_cpu_avail(1);
243240

244241
if (nthreads == 1) {
245242
asum = casum_compute(n, x, inc_x);

kernel/arm64/copy_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,10 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
183183
if (n <= 0) return 0;
184184

185185
#if defined(SMP)
186-
nthreads = num_cpu_avail(1);
187-
188-
if (inc_x == 0)
189-
nthreads = 1;
190-
191-
if (n <= 10000)
186+
if (inc_x == 0 || n <= 10000)
192187
nthreads = 1;
188+
else
189+
nthreads = num_cpu_avail(1);
193190

194191
if (nthreads == 1) {
195192
do_copy(n, x, inc_x, y, inc_y);

kernel/arm64/dasum_thunderx2t99.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,13 +228,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
228228
FLOAT asum = 0.0;
229229

230230
#if defined(SMP)
231-
nthreads = num_cpu_avail(1);
232-
233-
if (inc_x == 0)
234-
nthreads = 1;
235-
236-
if (n <= 10000)
231+
if (inc_x == 0 || n <= 10000)
237232
nthreads = 1;
233+
else
234+
nthreads = num_cpu_avail(1);
238235

239236
if (nthreads == 1) {
240237
asum = dasum_compute(n, x, inc_x);

kernel/arm64/dot_thunderx2t99.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
199199
" faddp "DOTF", v0.2d \n"
200200
#endif /* !defined(DSDOT) */
201201

202-
#else /* !defined(DOUBLE) */
202+
#else /* !defined(DOUBLE) */
203203
#define KERNEL_F1 \
204204
" ldr "TMPX", ["X"] \n" \
205205
" ldr "TMPY", ["Y"] \n" \
@@ -384,13 +384,10 @@ RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y
384384
RETURN_TYPE dot = 0.0;
385385

386386
#if defined(SMP)
387-
nthreads = num_cpu_avail(1);
388-
389-
if (inc_x == 0 || inc_y == 0)
390-
nthreads = 1;
391-
392-
if (n <= 10000)
387+
if (inc_x == 0 || inc_y == 0 || n <= 10000)
393388
nthreads = 1;
389+
else
390+
nthreads = num_cpu_avail(1);
394391

395392
if (nthreads == 1) {
396393
dot = dot_compute(n, x, inc_x, y, inc_y);

kernel/arm64/dznrm2_thunderx2t99.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,10 +328,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
328328
if (n <= 0 || inc_x <= 0) return 0.0;
329329

330330
#if defined(SMP)
331-
nthreads = num_cpu_avail(1);
332-
333331
if (n <= 10000)
334332
nthreads = 1;
333+
else
334+
nthreads = num_cpu_avail(1);
335335

336336
if (nthreads == 1) {
337337
nrm2_compute(n, x, inc_x, &ssq, &scale);

0 commit comments

Comments
 (0)