Skip to content

Commit 2d39e71

Browse files
authored
Merge pull request #3960 from martin-frbg/symmsyrk_sp
Add multithreading threshold for SYMM and rework the one for SYRK
2 parents 1659750 + f2d6b1c commit 2d39e71

File tree

2 files changed

+32
-15
lines changed

2 files changed

+32
-15
lines changed

interface/symm.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#endif
4545

4646
#ifndef COMPLEX
47+
#define SMP_THRESHOLD_MIN 65536.
4748
#ifdef XDOUBLE
4849
#define ERROR_NAME "QSYMM "
4950
#elif defined(DOUBLE)
@@ -52,6 +53,7 @@
5253
#define ERROR_NAME "SSYMM "
5354
#endif
5455
#else
56+
#define SMP_THRESHOLD_MIN 8192.
5557
#ifndef GEMM3M
5658
#ifndef HEMM
5759
#ifdef XDOUBLE
@@ -91,6 +93,10 @@
9193
#endif
9294
#endif
9395

96+
#ifndef GEMM_MULTITHREAD_THRESHOLD
97+
#define GEMM_MULTITHREAD_THRESHOLD 4
98+
#endif
99+
94100

95101
#ifdef SMP
96102
#ifndef COMPLEX
@@ -159,7 +165,9 @@ void NAME(char *SIDE, char *UPLO,
159165
#if defined(SMP) && !defined(NO_AFFINITY)
160166
int nodes;
161167
#endif
162-
168+
# if defined(SMP)
169+
int MN;
170+
#endif
163171
blasint info;
164172
int side;
165173
int uplo;
@@ -255,6 +263,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
255263
#if defined(SMP) && !defined(NO_AFFINITY)
256264
int nodes;
257265
#endif
266+
#if defined(SMP)
267+
int MN;
268+
#endif
258269

259270
PRINT_DEBUG_CNAME;
260271

@@ -375,15 +386,18 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
375386

376387
#ifdef SMP
377388
args.common = NULL;
378-
args.nthreads = num_cpu_avail(3);
379-
389+
MN = 2.* (double) args.m * (double)args.m * (double) args.n;
390+
if (MN <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
391+
args.nthreads = 1;
392+
} else {
393+
args.nthreads = num_cpu_avail(3);
394+
}
380395
if (args.nthreads == 1) {
381396
#endif
382397

383398
(symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
384399

385400
#ifdef SMP
386-
387401
} else {
388402

389403
#ifndef NO_AFFINITY

interface/syrk.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#endif
4545

4646
#ifndef COMPLEX
47+
#define SMP_THRESHOLD_MIN 109944.
4748
#ifdef XDOUBLE
4849
#define ERROR_NAME "QSYRK "
4950
#elif defined(DOUBLE)
@@ -52,6 +53,7 @@
5253
#define ERROR_NAME "SSYRK "
5354
#endif
5455
#else
56+
#define SMP_THRESHOLD_MIN 14824.
5557
#ifndef HEMM
5658
#ifdef XDOUBLE
5759
#define ERROR_NAME "XSYRK "
@@ -71,6 +73,10 @@
7173
#endif
7274
#endif
7375

76+
#ifndef GEMM_MULTITHREAD_THRESHOLD
77+
#define GEMM_MULTITHREAD_THRESHOLD 4
78+
#endif
79+
7480
static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
7581
#ifndef HEMM
7682
SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
@@ -101,6 +107,7 @@ void NAME(char *UPLO, char *TRANS,
101107
FLOAT *sa, *sb;
102108

103109
#ifdef SMP
110+
int NNK;
104111
#ifdef USE_SIMPLE_THREADED_LEVEL3
105112
#ifndef COMPLEX
106113
#ifdef XDOUBLE
@@ -225,6 +232,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
225232
FLOAT *sa, *sb;
226233

227234
#ifdef SMP
235+
int NNK;
236+
228237
#ifdef USE_SIMPLE_THREADED_LEVEL3
229238
#ifndef COMPLEX
230239
#ifdef XDOUBLE
@@ -354,26 +363,20 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
354363
#endif
355364

356365
args.common = NULL;
357-
#ifndef COMPLEX
358-
#ifdef DOUBLE
359-
if (args.n < 100)
360-
#else
361-
if (args.n < 200)
362-
#endif
363-
#else
364-
if (args.n < 65)
365-
#endif
366+
367+
NNK = (double)(args.n+1)*(double)args.n*(double)args.k;
368+
if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) {
366369
args.nthreads = 1;
367-
else
370+
} else {
368371
args.nthreads = num_cpu_avail(3);
372+
}
369373

370374
if (args.nthreads == 1) {
371375
#endif
372376

373377
(syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
374378

375379
#ifdef SMP
376-
377380
} else {
378381

379382
#ifndef USE_SIMPLE_THREADED_LEVEL3

0 commit comments

Comments
 (0)