Skip to content

Commit 4dd70d9

Browse files
authored
Merge pull request #1667 from xianyi/revert-1642-develop
Revert "Rewrite &= -> = and simplify the initial blocking phase."
2 parents 504310e + 5f2a3c0 commit 4dd70d9

File tree

1 file changed

+14
-13
lines changed

1 file changed

+14
-13
lines changed

driver/level3/level3_thread.c

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
344344
div_n = (n_to - n_from + DIVIDE_RATE - 1) / DIVIDE_RATE;
345345
for (js = n_from, bufferside = 0; js < n_to; js += div_n, bufferside ++) {
346346

347+
/* Make sure if no one is using workspace */
348+
START_RPCC();
349+
for (i = 0; i < args -> nthreads; i++)
350+
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
351+
STOP_RPCC(waiting1);
352+
347353
#if defined(FUSED_GEMM) && !defined(TIMING)
348354

349355
/* Fused operation to copy region of B into workspace and apply kernel */
@@ -381,15 +387,10 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
381387
}
382388
#endif
383389

384-
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++) {
385-
/* Make sure if no one is using workspace */
386-
START_RPCC();
387-
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
388-
STOP_RPCC(waiting1);
389-
/* Set flag so other threads can access local region of B */
390+
/* Set flag so other threads can access local region of B */
391+
for (i = mypos_n * nthreads_m; i < (mypos_n + 1) * nthreads_m; i++)
390392
job[mypos].working[i][CACHE_LINE_SIZE * bufferside] = (BLASLONG)buffer[bufferside];
391-
WMB;
392-
}
393+
WMB;
393394
}
394395

395396
/* Get regions of B from other threads and apply kernel */
@@ -425,13 +426,13 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
425426

426427
/* Clear synchronization flag if this thread is done with other region of B */
427428
if (m_to - m_from == min_i) {
428-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
429+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
429430
WMB;
430431
}
431432
}
432433
} while (current != mypos);
433434

434-
/* Iterate through steps of m
435+
/* Iterate through steps of m
435436
* Note: First step has already been finished */
436437
for(is = m_from + min_i; is < m_to; is += min_i){
437438
min_i = m_to - is;
@@ -461,14 +462,14 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
461462
sa, (FLOAT *)job[current].working[mypos][CACHE_LINE_SIZE * bufferside],
462463
c, ldc, is, js);
463464
STOP_RPCC(kernel);
464-
465+
465466
#ifdef TIMING
466467
ops += 2 * min_i * MIN(range_n[current + 1] - js, div_n) * min_l;
467468
#endif
468-
469+
469470
/* Clear synchronization flag if this thread is done with region of B */
470471
if (is + min_i >= m_to) {
471-
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] = 0;
472+
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
472473
WMB;
473474
}
474475
}

0 commit comments

Comments
 (0)