|
44 | 44 | #endif
|
45 | 45 |
|
46 | 46 | #ifndef COMPLEX
|
| 47 | +#define SMP_THRESHOLD_MIN 65536.0 |
47 | 48 | #ifdef XDOUBLE
|
48 | 49 | #define ERROR_NAME "QGEMM "
|
49 | 50 | #elif defined(DOUBLE)
|
|
52 | 53 | #define ERROR_NAME "SGEMM "
|
53 | 54 | #endif
|
54 | 55 | #else
|
| 56 | +#define SMP_THRESHOLD_MIN 8192.0 |
55 | 57 | #ifndef GEMM3M
|
56 | 58 | #ifdef XDOUBLE
|
57 | 59 | #define ERROR_NAME "XGEMM "
|
@@ -121,8 +123,6 @@ void NAME(char *TRANSA, char *TRANSB,
|
121 | 123 | FLOAT *sa, *sb;
|
122 | 124 |
|
123 | 125 | #ifdef SMP
|
124 |
| - int nthreads_max; |
125 |
| - int nthreads_avail; |
126 | 126 | double MNK;
|
127 | 127 | #ifndef COMPLEX
|
128 | 128 | #ifdef XDOUBLE
|
@@ -245,8 +245,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
245 | 245 | XFLOAT *sa, *sb;
|
246 | 246 |
|
247 | 247 | #ifdef SMP
|
248 |
| - int nthreads_max; |
249 |
| - int nthreads_avail; |
250 | 248 | double MNK;
|
251 | 249 | #ifndef COMPLEX
|
252 | 250 | #ifdef XDOUBLE
|
@@ -411,25 +409,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
411 | 409 | mode |= (transa << BLAS_TRANSA_SHIFT);
|
412 | 410 | mode |= (transb << BLAS_TRANSB_SHIFT);
|
413 | 411 |
|
414 |
| - nthreads_max = num_cpu_avail(3); |
415 |
| - nthreads_avail = nthreads_max; |
416 |
| - |
417 |
| -#ifndef COMPLEX |
418 | 412 | MNK = (double) args.m * (double) args.n * (double) args.k;
|
419 |
| - if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) |
420 |
| - nthreads_max = 1; |
421 |
| -#else |
422 |
| - MNK = (double) args.m * (double) args.n * (double) args.k; |
423 |
| - if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) |
424 |
| - nthreads_max = 1; |
425 |
| -#endif |
426 |
| - args.common = NULL; |
427 |
| - |
428 |
| - if ( nthreads_max > nthreads_avail ) |
429 |
| - args.nthreads = nthreads_avail; |
| 413 | + if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) |
| 414 | + args.nthreads = 1; |
430 | 415 | else
|
431 |
| - args.nthreads = nthreads_max; |
432 |
| - |
| 416 | + args.nthreads = num_cpu_avail(3); |
| 417 | + args.common = NULL; |
433 | 418 |
|
434 | 419 | if (args.nthreads == 1) {
|
435 | 420 | #endif
|
|
0 commit comments