Skip to content

Commit 0297b32

Browse files
authored
Merge pull request #1605 from oon3m0oo/develop
Improve performance of GEMM for small matrices when SMP is defined.
2 parents 6adc4b7 + 66316b9 commit 0297b32

File tree

2 files changed

+8
-22
lines changed

2 files changed

+8
-22
lines changed

interface/gemm.c

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#endif
4545

4646
#ifndef COMPLEX
47+
#define SMP_THRESHOLD_MIN 65536.0
4748
#ifdef XDOUBLE
4849
#define ERROR_NAME "QGEMM "
4950
#elif defined(DOUBLE)
@@ -52,6 +53,7 @@
5253
#define ERROR_NAME "SGEMM "
5354
#endif
5455
#else
56+
#define SMP_THRESHOLD_MIN 8192.0
5557
#ifndef GEMM3M
5658
#ifdef XDOUBLE
5759
#define ERROR_NAME "XGEMM "
@@ -121,8 +123,6 @@ void NAME(char *TRANSA, char *TRANSB,
121123
FLOAT *sa, *sb;
122124

123125
#ifdef SMP
124-
int nthreads_max;
125-
int nthreads_avail;
126126
double MNK;
127127
#ifndef COMPLEX
128128
#ifdef XDOUBLE
@@ -245,8 +245,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
245245
XFLOAT *sa, *sb;
246246

247247
#ifdef SMP
248-
int nthreads_max;
249-
int nthreads_avail;
250248
double MNK;
251249
#ifndef COMPLEX
252250
#ifdef XDOUBLE
@@ -411,25 +409,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
411409
mode |= (transa << BLAS_TRANSA_SHIFT);
412410
mode |= (transb << BLAS_TRANSB_SHIFT);
413411

414-
nthreads_max = num_cpu_avail(3);
415-
nthreads_avail = nthreads_max;
416-
417-
#ifndef COMPLEX
418412
MNK = (double) args.m * (double) args.n * (double) args.k;
419-
if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
420-
nthreads_max = 1;
421-
#else
422-
MNK = (double) args.m * (double) args.n * (double) args.k;
423-
if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
424-
nthreads_max = 1;
425-
#endif
426-
args.common = NULL;
427-
428-
if ( nthreads_max > nthreads_avail )
429-
args.nthreads = nthreads_avail;
413+
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) )
414+
args.nthreads = 1;
430415
else
431-
args.nthreads = nthreads_max;
432-
416+
args.nthreads = num_cpu_avail(3);
417+
args.common = NULL;
433418

434419
if (args.nthreads == 1) {
435420
#endif

interface/trsm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,12 +366,13 @@ void CNAME(enum CBLAS_ORDER order,
366366
mode |= (trans << BLAS_TRANSA_SHIFT);
367367
mode |= (side << BLAS_RSIDE_SHIFT);
368368

369-
args.nthreads = num_cpu_avail(3);
370369
if ( args.m < 2*GEMM_MULTITHREAD_THRESHOLD )
371370
args.nthreads = 1;
372371
else
373372
if ( args.n < 2*GEMM_MULTITHREAD_THRESHOLD )
374373
args.nthreads = 1;
374+
else
375+
args.nthreads = num_cpu_avail(3);
375376

376377

377378
if (args.nthreads == 1) {

0 commit comments

Comments
 (0)