Skip to content

Commit 2e69632

Browse files
authored
Merge pull request #2471 from AGSaidi/l3-fix-2
Fix barriers in level3_thread
2 parents e94590e + 97ce6bb commit 2e69632

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

driver/level3/level3_thread.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -351,8 +351,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
351351
/* Make sure if no one is using workspace */
352352
START_RPCC();
353353
for (i = 0; i < args -> nthreads; i++)
354-
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
354+
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
355355
STOP_RPCC(waiting1);
356+
MB;
356357

357358
#if defined(FUSED_GEMM) && !defined(TIMING)
358359

@@ -395,10 +396,10 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
395396
}
396397
#endif
397398

399+
WMB;
398400
/* Set flag so other threads can access local region of B */
399401
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++)
400402
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
401-
WMB;
402403
}
403404

404405
/* Get regions of B from other threads and apply kernel */
@@ -417,8 +418,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
417418

418419
/* Wait until other region of B is initialized */
419420
START_RPCC();
420-
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB;};
421+
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
421422
STOP_RPCC(waiting2);
423+
MB;
422424

423425
/* Apply kernel with local region of A and part of other region of B */
424426
START_RPCC();
@@ -434,8 +436,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
434436

435437
/* Clear synchronization flag if this thread is done with other region of B */
436438
if (m_to - m_from == min_i) {
437-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
438439
WMB;
440+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
439441
}
440442
}
441443
} while (current != mypos);
@@ -477,8 +479,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
477479

478480
/* Clear synchronization flag if this thread is done with region of B */
479481
if (is + min_i >= m_to) {
480-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
481482
WMB;
483+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
482484
}
483485
}
484486

@@ -497,10 +499,11 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
497499
START_RPCC();
498500
for (i = 0; i < args -> nthreads; i++) {
499501
for (js = 0; js < DIVIDE_RATE; js++) {
500-
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB;};
502+
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
501503
}
502504
}
503505
STOP_RPCC(waiting3);
506+
MB;
504507

505508
#ifdef TIMING
506509
BLASLONG waiting = waiting1 + waiting2 + waiting3;
@@ -705,7 +708,7 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
705708
}
706709
}
707710
}
708-
711+
WMB;
709712
/* Execute parallel computation */
710713
exec_blas(nthreads, queue);
711714
}

0 commit comments

Comments
 (0)