Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.

Commit bd6fbe7

Browse files
authored
Merge pull request #1174 from hjelmn/btl_progress
bml/r2: always add btl progress function
2 parents fb636c7 + 5faaa6d commit bd6fbe7

File tree

5 files changed

+191
-54
lines changed

5 files changed

+191
-54
lines changed

ompi/mca/bml/r2/bml_r2.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,22 +165,30 @@ static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc)
165165
return bml_endpoint;
166166
}
167167

168-
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl)
168+
static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl, bool hp)
169169
{
170170
if (NULL != btl->btl_component->btl_progress) {
171171
bool found = false;
172+
size_t p;
172173

173-
for (size_t p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
174+
for (p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
174175
if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
175176
found = true;
176177
break;
177178
}
178179
}
179180

180-
if (found == false) {
181-
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
182-
btl->btl_component->btl_progress;
183-
opal_progress_register (btl->btl_component->btl_progress);
181+
if (found == false || hp) {
182+
if (found == false) {
183+
mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
184+
btl->btl_component->btl_progress;
185+
}
186+
187+
if (hp) {
188+
opal_progress_register (btl->btl_component->btl_progress);
189+
} else {
190+
opal_progress_register_lp (btl->btl_component->btl_progress);
191+
}
184192
}
185193
}
186194
}
@@ -403,7 +411,7 @@ static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
403411
if (OMPI_SUCCESS != rc) {
404412
btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
405413
} else {
406-
mca_bml_r2_register_progress (btl);
414+
mca_bml_r2_register_progress (btl, true);
407415
btl_in_use = true;
408416
}
409417
}
@@ -546,9 +554,7 @@ static int mca_bml_r2_add_procs( size_t nprocs,
546554
btl_inuse++;
547555
}
548556

549-
if (btl_inuse) {
550-
mca_bml_r2_register_progress (btl);
551-
}
557+
mca_bml_r2_register_progress (btl, !!(btl_inuse));
552558
}
553559

554560
free(btl_endpoints);

opal/runtime/opal_params.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* reserved.
1515
* Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved.
1616
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
17-
* Copyright (c) 2010-2015 Los Alamos National Security, LLC.
17+
* Copyright (c) 2010-2016 Los Alamos National Security, LLC.
1818
* All rights reserved.
1919
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
2020
* Copyright (c) 2015 Research Organization for Information Science
@@ -45,6 +45,7 @@
4545
#include "opal/dss/dss.h"
4646
#include "opal/util/show_help.h"
4747
#include "opal/util/timings.h"
48+
#include "opal/util/bit_ops.h"
4849

4950
char *opal_signal_string = NULL;
5051
char *opal_net_private_ipv4 = NULL;
@@ -66,6 +67,7 @@ int opal_leave_pinned = -1;
6667
bool opal_leave_pinned_pipeline = false;
6768
bool opal_abort_print_stack = false;
6869
int opal_abort_delay = 0;
70+
unsigned int opal_progress_lp_call_ratio = 8;
6971

7072
static bool opal_register_done = false;
7173

@@ -279,6 +281,25 @@ int opal_register_params(void)
279281
return ret;
280282
}
281283

284+
opal_progress_lp_call_ratio = 8;
285+
ret = mca_base_var_register("opal", "opal", NULL, "progress_lp_call_ratio",
286+
"Ratio of calls to high-priority to low-priority progress "
287+
"functions. Higher numbers decrease the frequency of the callback "
288+
"rate. Must be a power of two (default: 8)",
289+
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
290+
OPAL_INFO_LVL_5,
291+
MCA_BASE_VAR_SCOPE_READONLY,
292+
&opal_progress_lp_call_ratio);
293+
if (0 > ret) {
294+
return ret;
295+
}
296+
297+
if (opal_progress_lp_call_ratio & (opal_progress_lp_call_ratio - 1)) {
298+
opal_output(0, "MCA variable progress_lp_call_ratio must be a power of two. value = %u",
299+
opal_progress_lp_call_ratio);
300+
return OPAL_ERR_BAD_PARAM;
301+
}
302+
282303
opal_abort_print_stack = false;
283304
ret = mca_base_var_register("opal", "opal", NULL, "abort_print_stack",
284305
"If nonzero, print out a stack trace when abort is invoked",

opal/runtime/opal_params.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ OPAL_DECLSPEC extern bool opal_abort_print_stack;
7373
*/
7474
OPAL_DECLSPEC extern int opal_abort_delay;
7575

76+
/**
77+
* Ratio of calls to high-priority to low-priority progress functions.
78+
* Must be a power of two.
79+
*/
80+
OPAL_DECLSPEC extern unsigned int opal_progress_lp_call_ratio;
81+
7682
#if OPAL_ENABLE_DEBUG
7783
extern bool opal_progress_debug;
7884
#endif

opal/runtime/opal_progress.c

Lines changed: 145 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
13-
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
13+
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1414
* reserved.
1515
* Copyright (c) 2015-2016 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
@@ -55,10 +55,15 @@ int opal_progress_spin_count = 10000;
5555
static opal_atomic_lock_t progress_lock;
5656

5757
/* callbacks to progress */
58-
static opal_progress_callback_t *callbacks = NULL;
58+
static volatile opal_progress_callback_t *callbacks = NULL;
5959
static size_t callbacks_len = 0;
6060
static size_t callbacks_size = 0;
6161

62+
static volatile opal_progress_callback_t *callbacks_lp = NULL;
63+
static size_t callbacks_lp_len = 0;
64+
static size_t callbacks_lp_size = 0;
65+
static uint64_t callbacks_lp_mask = 0x7;
66+
6267
/* do we want to call sched_yield() if nothing happened */
6368
bool opal_progress_yield_when_idle = false;
6469

@@ -89,6 +94,9 @@ static int debug_output = -1;
8994
*/
9095
static int fake_cb(void) { return 0; }
9196

97+
static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array,
98+
size_t *callback_array_len);
99+
92100
/* init the progress engine - called from orte_init */
93101
int
94102
opal_progress_init(void)
@@ -105,6 +113,30 @@ opal_progress_init(void)
105113
}
106114
#endif
107115

116+
117+
callbacks_lp_mask = opal_progress_lp_call_ratio - 1;
118+
119+
callbacks_size = callbacks_lp_size = 8;
120+
121+
callbacks = malloc (callbacks_size * sizeof (callbacks[0]));
122+
callbacks_lp = malloc (callbacks_lp_size * sizeof (callbacks_lp[0]));
123+
124+
if (NULL == callbacks || NULL == callbacks_lp) {
125+
free ((void *) callbacks);
126+
free ((void *) callbacks_lp);
127+
callbacks_size = callbacks_lp_size = 0;
128+
callbacks = callbacks_lp = NULL;
129+
return OPAL_ERR_OUT_OF_RESOURCE;
130+
}
131+
132+
for (size_t i = 0 ; i < callbacks_size ; ++i) {
133+
callbacks[i] = fake_cb;
134+
}
135+
136+
for (size_t i = 0 ; i < callbacks_lp_size ; ++i) {
137+
callbacks_lp[i] = fake_cb;
138+
}
139+
108140
OPAL_OUTPUT((debug_output, "progress: initialized event flag to: %x",
109141
opal_progress_event_flag));
110142
OPAL_OUTPUT((debug_output, "progress: initialized yield_when_idle to: %s",
@@ -126,10 +158,13 @@ opal_progress_finalize(void)
126158

127159
callbacks_len = 0;
128160
callbacks_size = 0;
129-
if (NULL != callbacks) {
130-
free(callbacks);
131-
callbacks = NULL;
132-
}
161+
free ((void *) callbacks);
162+
callbacks = NULL;
163+
164+
callbacks_lp_len = 0;
165+
callbacks_lp_size = 0;
166+
free ((void *) callbacks_lp);
167+
callbacks_lp = NULL;
133168

134169
opal_atomic_unlock(&progress_lock);
135170

@@ -151,6 +186,7 @@ opal_progress_finalize(void)
151186
void
152187
opal_progress(void)
153188
{
189+
static volatile uint64_t num_calls = 0;
154190
size_t i;
155191
int events = 0;
156192

@@ -189,6 +225,13 @@ opal_progress(void)
189225
events += (callbacks[i])();
190226
}
191227

228+
if ((OPAL_THREAD_ADD64((volatile int64_t *) &num_calls, 1) & callbacks_lp_mask) == 0) {
229+
/* run low priority callbacks once every 8 calls to opal_progress() */
230+
for (i = 0 ; i < callbacks_lp_len ; ++i) {
231+
events += (callbacks_lp[i])();
232+
}
233+
}
234+
192235
#if OPAL_HAVE_SCHED_YIELD
193236
if (opal_progress_yield_when_idle && events <= 0) {
194237
/* If there is nothing to do - yield the processor - otherwise
@@ -310,71 +353,130 @@ opal_progress_set_event_poll_rate(int polltime)
310353
#endif
311354
}
312355

356+
static int opal_progress_find_cb (opal_progress_callback_t cb, volatile opal_progress_callback_t *cbs,
357+
size_t cbs_len)
358+
{
359+
for (size_t i = 0 ; i < cbs_len ; ++i) {
360+
if (cbs[i] == cb) {
361+
return (int) i;
362+
}
363+
}
313364

314-
int
315-
opal_progress_register(opal_progress_callback_t cb)
365+
return OPAL_ERR_NOT_FOUND;
366+
}
367+
368+
static int _opal_progress_register (opal_progress_callback_t cb, volatile opal_progress_callback_t **cbs,
369+
size_t *cbs_size, size_t *cbs_len)
316370
{
317371
int ret = OPAL_SUCCESS;
318-
size_t index;
319372

320-
opal_atomic_lock(&progress_lock);
373+
if (OPAL_ERR_NOT_FOUND != opal_progress_find_cb (cb, *cbs, *cbs_len)) {
374+
return OPAL_SUCCESS;
375+
}
321376

322377
/* see if we need to allocate more space */
323-
if (callbacks_len + 1 > callbacks_size) {
324-
opal_progress_callback_t *tmp;
325-
tmp = (opal_progress_callback_t*)realloc(callbacks, sizeof(opal_progress_callback_t) * (callbacks_size + 4));
378+
if (*cbs_len + 1 > *cbs_size) {
379+
opal_progress_callback_t *tmp, *old;
380+
381+
tmp = (opal_progress_callback_t *) malloc (sizeof (tmp[0]) * 2 * *cbs_size);
326382
if (tmp == NULL) {
327-
ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
328-
goto cleanup;
383+
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
384+
}
385+
386+
if (*cbs) {
387+
/* copy old callbacks */
388+
memcpy (tmp, (void *) *cbs, sizeof(tmp[0]) * *cbs_size);
329389
}
330-
/* registering fake callbacks to fill callbacks[] */
331-
for( index = callbacks_len + 1 ; index < callbacks_size + 4 ; index++) {
332-
tmp[index] = &fake_cb;
390+
391+
for (size_t i = *cbs_len ; i < 2 * *cbs_size ; ++i) {
392+
tmp[i] = fake_cb;
333393
}
334394

335-
callbacks = tmp;
336-
callbacks_size += 4;
395+
opal_atomic_wmb ();
396+
397+
/* swap out callback array */
398+
old = opal_atomic_swap_ptr (cbs, tmp);
399+
400+
opal_atomic_wmb ();
401+
402+
free (old);
403+
*cbs_size *= 2;
337404
}
338405

339-
callbacks[callbacks_len++] = cb;
406+
cbs[0][*cbs_len] = cb;
407+
++*cbs_len;
340408

341-
cleanup:
409+
opal_atomic_wmb ();
410+
411+
return ret;
412+
}
413+
414+
int opal_progress_register (opal_progress_callback_t cb)
415+
{
416+
int ret;
417+
418+
opal_atomic_lock(&progress_lock);
419+
420+
(void) _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len);
421+
422+
ret = _opal_progress_register (cb, &callbacks, &callbacks_size, &callbacks_len);
342423

343424
opal_atomic_unlock(&progress_lock);
344425

345426
return ret;
346427
}
347428

348-
int
349-
opal_progress_unregister(opal_progress_callback_t cb)
429+
int opal_progress_register_lp (opal_progress_callback_t cb)
350430
{
351-
size_t i;
352-
int ret = OPAL_ERR_NOT_FOUND;
431+
int ret;
353432

354433
opal_atomic_lock(&progress_lock);
355434

356-
for (i = 0 ; i < callbacks_len ; ++i) {
357-
if (cb == callbacks[i]) {
358-
callbacks[i] = &fake_cb;
359-
ret = OPAL_SUCCESS;
360-
break;
361-
}
435+
(void) _opal_progress_unregister (cb, callbacks, &callbacks_len);
436+
437+
ret = _opal_progress_register (cb, &callbacks_lp, &callbacks_lp_size, &callbacks_lp_len);
438+
439+
opal_atomic_unlock(&progress_lock);
440+
441+
return ret;
442+
}
443+
444+
static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array,
445+
size_t *callback_array_len)
446+
{
447+
int ret = opal_progress_find_cb (cb, callback_array, *callback_array_len);
448+
if (OPAL_ERR_NOT_FOUND == ret) {
449+
return ret;
362450
}
363451

364452
/* If we found the function we're unregistering: If callbacks_len
365453
is 0, we're not goig to do anything interesting anyway, so
366454
skip. If callbacks_len is 1, it will soon be 0, so no need to
367-
do any repacking. size_t can be unsigned, so 0 - 1 is bad for
368-
a loop condition :). */
369-
if (OPAL_SUCCESS == ret) {
370-
if (callbacks_len > 1 ) {
371-
/* now tightly pack the array */
372-
for ( ; i < callbacks_len - 1 ; ++i) {
373-
callbacks[i] = callbacks[i + 1];
374-
}
375-
}
376-
callbacks[callbacks_len - 1] = &fake_cb;
377-
callbacks_len--;
455+
do any repacking. */
456+
for (size_t i = (size_t) ret ; i < *callback_array_len - 1 ; ++i) {
457+
/* copy callbacks atomically since another thread may be in
458+
* opal_progress(). */
459+
(void) opal_atomic_swap_ptr (callback_array + i, callback_array[i+1]);
460+
}
461+
462+
callback_array[*callback_array_len] = fake_cb;
463+
--*callback_array_len;
464+
465+
return OPAL_SUCCESS;
466+
}
467+
468+
int opal_progress_unregister (opal_progress_callback_t cb)
469+
{
470+
int ret;
471+
472+
opal_atomic_lock(&progress_lock);
473+
474+
ret = _opal_progress_unregister (cb, callbacks, &callbacks_len);
475+
476+
if (OPAL_SUCCESS != ret) {
477+
/* if not in the high-priority array try to remove from the lp array.
478+
* a callback will never be in both. */
479+
ret = _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len);
378480
}
379481

380482
opal_atomic_unlock(&progress_lock);

0 commit comments

Comments
 (0)