52
52
53
53
/* Thread server common information */
54
54
typedef struct {
55
- CRITICAL_SECTION lock ;
56
- HANDLE filled ;
57
- HANDLE killed ;
55
+ HANDLE taskSemaphore ;
58
56
59
57
blas_queue_t * queue ; /* Parameter Pointer */
60
58
int shutdown ; /* server shutdown flag */
@@ -68,6 +66,7 @@ int blas_server_avail = 0;
68
66
static BLASULONG server_lock = 0 ;
69
67
70
68
static blas_pool_t pool ;
69
+ static BLASULONG pool_lock = 0 ;
71
70
static HANDLE blas_threads [MAX_CPU_NUMBER ];
72
71
static DWORD blas_threads_id [MAX_CPU_NUMBER ];
73
72
@@ -198,7 +197,6 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
198
197
199
198
/* This is a main routine of threads. Each thread waits until job is */
200
199
/* queued. */
201
-
202
200
static DWORD WINAPI blas_thread_server (void * arg ){
203
201
204
202
/* Thread identifier */
@@ -207,9 +205,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
207
205
#endif
208
206
209
207
void * buffer , * sa , * sb ;
210
- blas_queue_t * queue ;
211
- DWORD action ;
212
- HANDLE handles [] = {pool .filled , pool .killed };
208
+ volatile blas_queue_t * queue ;
213
209
214
210
/* Each server needs each buffer */
215
211
buffer = blas_memory_alloc (2 );
@@ -226,28 +222,32 @@ static DWORD WINAPI blas_thread_server(void *arg){
226
222
fprintf (STDERR , "Server[%2ld] Waiting for Queue.\n" , cpu );
227
223
#endif
228
224
229
- do {
230
- action = WaitForMultipleObjects (2 , handles , FALSE, INFINITE );
231
- } while ((action != WAIT_OBJECT_0 ) && (action != WAIT_OBJECT_0 + 1 ));
232
-
233
- if (action == WAIT_OBJECT_0 + 1 ) break ;
225
+ // all worker threads wait on the semaphore
226
+ WaitForSingleObject (pool .taskSemaphore , INFINITE );
234
227
228
+ // kill the thread if we are shutting down the server
229
+ if (pool .shutdown )
230
+ break ;
231
+
235
232
#ifdef SMP_DEBUG
236
233
fprintf (STDERR , "Server[%2ld] Got it.\n" , cpu );
237
234
#endif
238
235
239
- EnterCriticalSection (& pool .lock );
236
+ // grab a queued task and update the list
237
+ volatile blas_queue_t * queue_next ;
238
+ LONG64 prev_value ;
239
+ do {
240
+ queue = (volatile blas_queue_t * )pool .queue ;
241
+ if (!queue )
242
+ break ;
240
243
241
- queue = pool .queue ;
242
- if (queue ) pool .queue = queue -> next ;
243
-
244
- LeaveCriticalSection (& pool .lock );
244
+ queue_next = (volatile blas_queue_t * )queue -> next ;
245
+ prev_value = InterlockedCompareExchange64 ((PLONG64 )& pool .queue , (LONG64 )queue_next , (LONG64 )queue );
246
+ } while (prev_value != queue );
245
247
246
248
if (queue ) {
247
249
int (* routine )(blas_arg_t * , void * , void * , void * , void * , BLASLONG ) = queue -> routine ;
248
250
249
- if (pool .queue ) SetEvent (pool .filled );
250
-
251
251
sa = queue -> sa ;
252
252
sb = queue -> sb ;
253
253
@@ -332,13 +332,8 @@ static DWORD WINAPI blas_thread_server(void *arg){
332
332
fprintf (STDERR , "Server[%2ld] Finished!\n" , cpu );
333
333
#endif
334
334
335
- EnterCriticalSection (& queue -> lock );
336
-
337
- queue -> status = BLAS_STATUS_FINISHED ;
338
-
339
- LeaveCriticalSection (& queue -> lock );
340
-
341
- SetEvent (queue -> finish );
335
+ // mark our sub-task as complete
336
+ InterlockedDecrement (& queue -> status );
342
337
}
343
338
344
339
/* Shutdown procedure */
@@ -353,7 +348,7 @@ static DWORD WINAPI blas_thread_server(void *arg){
353
348
}
354
349
355
350
/* Initializing routine */
356
- int blas_thread_init (void ){
351
+ int blas_thread_init (void ){
357
352
BLASLONG i ;
358
353
359
354
if (blas_server_avail || (blas_cpu_number <= 1 )) return 0 ;
@@ -367,9 +362,7 @@ int blas_thread_init(void){
367
362
368
363
if (!blas_server_avail ){
369
364
370
- InitializeCriticalSection (& pool .lock );
371
- pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
372
- pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
365
+ pool .taskSemaphore = CreateSemaphore (NULL , 0 , blas_cpu_number - 1 , NULL );
373
366
374
367
pool .shutdown = 0 ;
375
368
pool .queue = NULL ;
@@ -391,11 +384,10 @@ int blas_thread_init(void){
391
384
/*
392
385
User can call one of two routines.
393
386
394
- exec_blas_async ... immediately returns after jobs are queued.
387
+ exec_blas_async ... immediately returns after jobs are queued.
395
388
396
- exec_blas ... returns after jobs are finished.
389
+ exec_blas ... returns after jobs are finished.
397
390
*/
398
-
399
391
int exec_blas_async (BLASLONG pos , blas_queue_t * queue ){
400
392
401
393
#if defined(SMP_SERVER )
@@ -409,8 +401,7 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
409
401
current = queue ;
410
402
411
403
while (current ) {
412
- InitializeCriticalSection (& current -> lock );
413
- current -> finish = CreateEvent (NULL , FALSE, FALSE, NULL );
404
+ current -> status = 1 ;
414
405
current -> position = pos ;
415
406
416
407
#ifdef CONSISTENT_FPCSR
@@ -422,19 +413,10 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
422
413
pos ++ ;
423
414
}
424
415
425
- EnterCriticalSection ( & pool .lock ) ;
416
+ pool .queue = queue ;
426
417
427
- if (pool .queue ) {
428
- current = pool .queue ;
429
- while (current -> next ) current = current -> next ;
430
- current -> next = queue ;
431
- } else {
432
- pool .queue = queue ;
433
- }
434
-
435
- LeaveCriticalSection (& pool .lock );
436
-
437
- SetEvent (pool .filled );
418
+ // start up worker threads
419
+ ReleaseSemaphore (pool .taskSemaphore , pos - 1 , NULL );
438
420
439
421
return 0 ;
440
422
}
@@ -450,10 +432,9 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
450
432
fprintf (STDERR , "Waiting Queue ..\n" );
451
433
#endif
452
434
453
- WaitForSingleObject (queue -> finish , INFINITE );
454
-
455
- CloseHandle (queue -> finish );
456
- DeleteCriticalSection (& queue -> lock );
435
+ // spin-wait on each sub-task to finish
436
+ while (* ((volatile int * )& queue -> status ))
437
+ YIELDING ;
457
438
458
439
queue = queue -> next ;
459
440
num -- ;
@@ -501,18 +482,21 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
501
482
502
483
/* Shutdown procedure, but user don't have to call this routine. The */
503
484
/* kernel automatically kill threads. */
504
-
505
485
int BLASFUNC (blas_thread_shutdown )(void ){
506
486
507
487
int i ;
508
488
489
+ #ifdef SMP_DEBUG
490
+ fprintf (STDERR , "blas_thread_shutdown..\n" );
491
+ #endif
492
+
509
493
if (!blas_server_avail ) return 0 ;
510
494
511
495
LOCK_COMMAND (& server_lock );
512
496
513
497
if (blas_server_avail ){
514
498
515
- SetEvent ( pool .killed ) ;
499
+ pool .shutdown = 1 ;
516
500
517
501
for (i = 0 ; i < blas_num_threads - 1 ; i ++ ){
518
502
// Could also just use WaitForMultipleObjects
@@ -528,8 +512,7 @@ int BLASFUNC(blas_thread_shutdown)(void){
528
512
CloseHandle (blas_threads [i ]);
529
513
}
530
514
531
- CloseHandle (pool .filled );
532
- CloseHandle (pool .killed );
515
+ CloseHandle (pool .taskSemaphore );
533
516
534
517
blas_server_avail = 0 ;
535
518
}
@@ -559,16 +542,14 @@ void goto_set_num_threads(int num_threads)
559
542
//increased_threads = 1;
560
543
if (!blas_server_avail ){
561
544
562
- InitializeCriticalSection (& pool .lock );
563
- pool .filled = CreateEvent (NULL , FALSE, FALSE, NULL );
564
- pool .killed = CreateEvent (NULL , TRUE, FALSE, NULL );
545
+ pool .taskSemaphore = CreateSemaphore (NULL , 0 , blas_cpu_number - 1 , NULL );
565
546
566
547
pool .shutdown = 0 ;
567
548
pool .queue = NULL ;
568
549
blas_server_avail = 1 ;
569
550
}
570
551
571
- for (i = blas_num_threads - 1 ; i < num_threads - 1 ; i ++ ){
552
+ for (i = blas_num_threads ; i < num_threads - 1 ; i ++ ){
572
553
573
554
blas_threads [i ] = CreateThread (NULL , 0 ,
574
555
blas_thread_server , (void * )i ,
0 commit comments