Skip to content

Commit 6195ec0

Browse files
committed
Merge pull request #1677 from hjelmn/add_procs_lockup
bml/r2: always add btl progress function
2 parents 4e0749f + a679cc0 commit 6195ec0

File tree

3 files changed

+93
-23
lines changed

3 files changed

+93
-23
lines changed

ompi/mca/bml/r2/bml_r2.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -166,22 +166,30 @@ static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc)
166166
return bml_endpoint;
167167
}
168168

169-
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl)
169+
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl, bool hp)
170170
{
171171
if (NULL != btl->btl_component->btl_progress) {
172172
bool found = false;
173+
size_t p;
173174

174-
for (size_t p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
175+
for (p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
175176
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
176177
found = true;
177178
break;
178179
}
179180
}
180181

181-
if (found == false) {
182-
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
183-
btl->btl_component->btl_progress;
184-
opal_progress_register (btl->btl_component->btl_progress);
182+
if (found == false || hp) {
183+
if (found == false) {
184+
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
185+
btl->btl_component->btl_progress;
186+
}
187+
188+
if (hp) {
189+
opal_progress_register (btl->btl_component->btl_progress);
190+
} else {
191+
opal_progress_register_lp (btl->btl_component->btl_progress);
192+
}
185193
}
186194
}
187195
}
@@ -405,7 +413,7 @@ static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
405413
if (OMPI_SUCCESS != rc) {
406414
btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
407415
} else {
408-
mca_bml_r2_register_progress (btl);
416+
mca_bml_r2_register_progress (btl, true);
409417
btl_in_use = true;
410418
}
411419
}
@@ -546,9 +554,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
546554
btl_inuse++;
547555
}
548556

549-
if (btl_inuse) {
550-
mca_bml_r2_register_progress (btl);
551-
}
557+
mca_bml_r2_register_progress (btl, !!(btl_inuse));
552558
}
553559

554560
free(btl_endpoints);

opal/runtime/opal_progress.c

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
13+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1414
* reserved.
1515
* Copyright (c) 2015-2016 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
@@ -59,6 +59,10 @@ static opal_progress_callback_t *callbacks = NULL;
5959
static size_t callbacks_len = 0;
6060
static size_t callbacks_size = 0;
6161

62+
static opal_progress_callback_t *callbacks_lp = NULL;
63+
static size_t callbacks_lp_len = 0;
64+
static size_t callbacks_lp_size = 0;
65+
6266
/* do we want to call sched_yield() if nothing happened */
6367
bool opal_progress_yield_when_idle = false;
6468

@@ -151,6 +155,7 @@ opal_progress_finalize(void)
151155
void
152156
opal_progress(void)
153157
{
158+
static volatile uint64_t num_calls = 0;
154159
size_t i;
155160
int events = 0;
156161

@@ -189,6 +194,13 @@ opal_progress(void)
189194
events += (callbacks[i])();
190195
}
191196

197+
if ((OPAL_THREAD_ADD64((volatile int64_t *) &num_calls, 1) & 0x7) == 0) {
198+
/* run low priority callbacks once every 8 calls to opal_progress() */
199+
for (i = 0 ; i < callbacks_lp_len ; ++i) {
200+
events += (callbacks_lp[i])();
201+
}
202+
}
203+
192204
#if OPAL_HAVE_SCHED_YIELD
193205
if (opal_progress_yield_when_idle && events <= 0) {
194206
/* If there is nothing to do - yield the processor - otherwise
@@ -317,6 +329,9 @@ opal_progress_register(opal_progress_callback_t cb)
317329
int ret = OPAL_SUCCESS;
318330
size_t index;
319331

332+
/* just in case there is a low-priority callback remove it */
333+
(void) opal_progress_unregister (cb);
334+
320335
opal_atomic_lock(&progress_lock);
321336

322337
/* see if we need to allocate more space */
@@ -345,17 +360,53 @@ opal_progress_register(opal_progress_callback_t cb)
345360
return ret;
346361
}
347362

348-
int
349-
opal_progress_unregister(opal_progress_callback_t cb)
363+
int opal_progress_register_lp (opal_progress_callback_t cb)
364+
{
365+
int ret = OPAL_SUCCESS;
366+
size_t index;
367+
368+
/* just in case there is a high-priority callback remove it */
369+
(void) opal_progress_unregister (cb);
370+
371+
opal_atomic_lock(&progress_lock);
372+
373+
/* see if we need to allocate more space */
374+
if (callbacks_lp_len + 1 > callbacks_lp_size) {
375+
opal_progress_callback_t *tmp;
376+
tmp = (opal_progress_callback_t*)realloc(callbacks_lp, sizeof(opal_progress_callback_t) * (callbacks_lp_size + 4));
377+
if (tmp == NULL) {
378+
ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
379+
goto cleanup;
380+
}
381+
/* registering fake callbacks_lp to fill callbacks_lp[] */
382+
for( index = callbacks_lp_len + 1 ; index < callbacks_lp_size + 4 ; index++) {
383+
tmp[index] = &fake_cb;
384+
}
385+
386+
callbacks_lp = tmp;
387+
callbacks_lp_size += 4;
388+
}
389+
390+
callbacks_lp[callbacks_lp_len++] = cb;
391+
392+
cleanup:
393+
394+
opal_atomic_unlock(&progress_lock);
395+
396+
return ret;
397+
}
398+
399+
static int _opal_progress_unregister (opal_progress_callback_t cb, opal_progress_callback_t *callback_array,
400+
size_t callback_array_len)
350401
{
351402
size_t i;
352403
int ret = OPAL_ERR_NOT_FOUND;
353404

354405
opal_atomic_lock(&progress_lock);
355406

356-
for (i = 0 ; i < callbacks_len ; ++i) {
357-
if (cb == callbacks[i]) {
358-
callbacks[i] = &fake_cb;
407+
for (i = 0 ; i < callback_array_len ; ++i) {
408+
if (cb == callback_array[i]) {
409+
callback_array[i] = &fake_cb;
359410
ret = OPAL_SUCCESS;
360411
break;
361412
}
@@ -367,17 +418,28 @@ opal_progress_unregister(opal_progress_callback_t cb)
367418
do any repacking. size_t can be unsigned, so 0 - 1 is bad for
368419
a loop condition :). */
369420
if (OPAL_SUCCESS == ret) {
370-
if (callbacks_len > 1 ) {
371-
/* now tightly pack the array */
372-
for ( ; i < callbacks_len - 1 ; ++i) {
373-
callbacks[i] = callbacks[i + 1];
374-
}
421+
if (i < callback_array_len - 1) {
422+
memmove (callback_array + i, callback_array + i + 1,
423+
(callback_array_len - i - 1) * sizeof (callback_array[0]));
375424
}
376-
callbacks[callbacks_len - 1] = &fake_cb;
377-
callbacks_len--;
425+
426+
callback_array[callback_array_len - 1] = &fake_cb;
427+
callback_array_len--;
378428
}
379429

380430
opal_atomic_unlock(&progress_lock);
381431

382432
return ret;
383433
}
434+
435+
int opal_progress_unregister (opal_progress_callback_t cb)
436+
{
437+
int ret = _opal_progress_unregister (cb, callbacks, callbacks_len);
438+
if (OPAL_SUCCESS != ret) {
439+
/* if not in the high-priority array try to remove from the lp array.
440+
* a callback will never be in both. */
441+
return _opal_progress_unregister (cb, callbacks_lp, callbacks_lp_len);
442+
}
443+
444+
return ret;
445+
}

opal/runtime/opal_progress.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ typedef int (*opal_progress_callback_t)(void);
163163
*/
164164
OPAL_DECLSPEC int opal_progress_register(opal_progress_callback_t cb);
165165

166+
OPAL_DECLSPEC int opal_progress_register_lp (opal_progress_callback_t cb);
167+
166168

167169
/**
168170
* Deregister previously registered event

0 commit comments

Comments
 (0)