Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2012-2013 Inria. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
*
Expand Down Expand Up @@ -284,6 +284,7 @@ opal_list_t ompi_registered_datareps = {{0}};

bool ompi_enable_timing = false;
extern bool ompi_mpi_yield_when_idle;
extern uint32_t ompi_mpi_sleep_when_idle_threshold;
extern int ompi_mpi_event_tick_rate;

/**
Expand Down Expand Up @@ -971,6 +972,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* see if yield_when_idle was specified - if so, use it */
opal_progress_set_yield_when_idle(ompi_mpi_yield_when_idle);

/* set the threshold to start sleeping when idle */
opal_progress_set_sleep_when_idle_threshold(ompi_mpi_sleep_when_idle_threshold);

/* negative value means use default - just don't do anything */
if (ompi_mpi_event_tick_rate >= 0) {
opal_progress_set_event_poll_rate(ompi_mpi_event_tick_rate);
Expand Down
15 changes: 12 additions & 3 deletions ompi/runtime/ompi_mpi_params.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -63,6 +63,7 @@ bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);

bool ompi_mpi_yield_when_idle = false;
int ompi_mpi_sleep_when_idle_threshold = -1;
int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
Expand Down Expand Up @@ -109,12 +110,20 @@ int ompi_mpi_register_params(void)
exactly/under-subscribed, or 1 when oversubscribed */
ompi_mpi_yield_when_idle = false;
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
"Yield the processor when waiting for communication (for MPI processes, will default to 1 when oversubscribing nodes)",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You still mention MPI in here, and also mention oversubscription (but the code doesn't check for oversubscription (per comments, we're still discussing what the default should be -- I just want to make sure that we don't forget to update the help message when a decision is made).

MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_yield_when_idle);

ompi_mpi_sleep_when_idle_threshold = -1;
(void) mca_base_var_register("ompi", "mpi", NULL, "sleep_when_idle_threshold",
"Sleep after waiting for communication too long",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_sleep_when_idle_threshold);

ompi_mpi_event_tick_rate = -1;
(void) mca_base_var_register("ompi", "mpi", NULL, "event_tick_rate",
"How often to progress TCP communications (0 = never, otherwise specified in microseconds)",
Expand Down
49 changes: 40 additions & 9 deletions opal/runtime/opal_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
Expand All @@ -27,6 +27,11 @@
#ifdef HAVE_SCHED_H
#include <sched.h>
#endif
#if HAVE_POLL_H
#include <poll.h>
#elif HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif

#include "opal/runtime/opal_progress.h"
#include "opal/mca/event/event.h"
Expand Down Expand Up @@ -66,6 +71,9 @@ static size_t callbacks_lp_size = 0;

/* do we want to call sched_yield() if nothing happened */
bool opal_progress_yield_when_idle = false;
/* do we want to sleep if nothing happened for a while */
int opal_progress_sleep_when_idle_threshold = -1;
static int yield_count = 0;

#if OPAL_PROGRESS_USE_TIMERS
static opal_timer_t event_progress_last_time = 0;
Expand Down Expand Up @@ -229,16 +237,25 @@ opal_progress(void)
}
}

if (OPAL_UNLIKELY(opal_progress_yield_when_idle)) {
if (events <= 0) {
if (opal_progress_sleep_when_idle_threshold < 0 || yield_count < opal_progress_sleep_when_idle_threshold) {
yield_count++;
#if OPAL_HAVE_SCHED_YIELD
if (opal_progress_yield_when_idle && events <= 0) {
/* If there is nothing to do - yield the processor - otherwise
* we could consume the processor for the entire time slice. If
* the processor is oversubscribed - this will result in a best-case
* latency equivalent to the time-slice.
*/
sched_yield();
}
/* If there is nothing to do - yield the processor - otherwise
* we could consume the processor for the entire time slice. If
* the processor is oversubscribed - this will result in a best-case
* latency equivalent to the time-slice.
*/
sched_yield();
#endif /* defined(HAVE_SCHED_YIELD) */
} else {
poll(NULL, 0, 1);
}
} else {
yield_count = 0;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If, per the comment above, we end up squashing both mechanisms into the same MCA variable, you should probably squash these two blocks together (i.e., the yield() and poll() blocks) so that we only have to have a single if statement (probably should be with an OPAL_UNLIKELY) gating entrance into that block.

}


Expand Down Expand Up @@ -310,6 +327,20 @@ opal_progress_set_yield_when_idle(bool yieldopt)
}


int
opal_progress_set_sleep_when_idle_threshold(int thresholdopt)
{
int tmp = opal_progress_sleep_when_idle_threshold;

opal_progress_sleep_when_idle_threshold = thresholdopt;

OPAL_OUTPUT((debug_output, "progress: progress_set_sleep_threshold to %d",
thresholdopt));

return tmp;
}


void
opal_progress_set_event_poll_rate(int polltime)
{
Expand Down
28 changes: 23 additions & 5 deletions opal/runtime/opal_progress.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
* All rights reserved.
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -113,19 +115,35 @@ OPAL_DECLSPEC void opal_progress_event_users_decrement(void);
/**
* Set whether opal_progress() should yield when idle
*
* Set whether opal_progress() should yield the processor (either by
* sched_yield() or SwitchToThread()) if no events were progressed
* during the progress loop. The return value of the callback
* functions is used to determine whether or not yielding is required.
* Set whether opal_progress() should yield the processor by
* sched_yield() no events were progressed during the progress loop.
* The return value of the callback functions is used to determine
* whether or not yielding is required.
* By default, the event loop will yield when the progress function is
* idle.
* idle only when oversubscription.
*
* @param yieldopt Whether to yield when idle.
* @return Previous value of the yield_when_idle option.
*/
OPAL_DECLSPEC bool opal_progress_set_yield_when_idle(bool yieldopt);


/**
* Set whether opal_progress() should sleep when idle for a while
*
* Set whether opal_progress() should sleep if no events were progressed
* during the progress loop for some time. The return value of the callback
* functions is used to determine whether or not sleeping is required.
* By default, the event loop will not sleep when the progress function is
* idle.
* -1 means no sleeping and 0 means always sleep.
*
* @param thresholdopt The threshold to start sleeping.
* @return Previous value of the sleep_threshold option.
*/
OPAL_DECLSPEC int opal_progress_set_sleep_when_idle_threshold(int thresholdopt);


/**
* Set time between calls into the event library
*
Expand Down