Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion opal/mca/btl/usnic/btl_usnic.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ extern uint64_t opal_btl_usnic_ticks;
extern opal_recursive_mutex_t btl_usnic_lock;

static inline uint64_t
get_nsec(void)
get_ticks(void)
{
return opal_btl_usnic_ticks;
}
Expand Down Expand Up @@ -206,6 +206,14 @@ typedef struct opal_btl_usnic_component_t {
/** retrans characteristics */
int retrans_timeout;

/** max number of messages re-sent during a single progress
iteration */
int max_resends_per_iteration;

/** minimum number of times through component progress before
checking to see if standalone ACKs need to be sent */
int ack_iteration_delay;

/** transport header length for all usNIC devices on this server
(it is guaranteed that all usNIC devices on a single server
will have the same underlying transport, and therefore the
Expand Down
25 changes: 14 additions & 11 deletions opal/mca/btl/usnic/btl_usnic_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,

static void usnic_clock_callback(int fd, short flags, void *timeout)
{
/* 1ms == 1,000,000 ns */
opal_btl_usnic_ticks += 1000000;
/* Increase by so many ticks that we will definitely force sending
any ACKs that are pending */
opal_btl_usnic_ticks += 1000;

/* run progress to make sure time change gets noticed */
usnic_component_progress();
Expand Down Expand Up @@ -1132,7 +1133,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
*/
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
static int usnic_component_progress_2(void);
static int usnic_component_progress_2(bool check_priority);
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
opal_btl_usnic_channel_t *channel, int cq_ret);

Expand All @@ -1145,9 +1146,7 @@ static int usnic_component_progress(void)
struct fi_cq_entry completion;
opal_btl_usnic_channel_t *channel;
static bool fastpath_ok = true;

/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
bool check_priority = true;

count = 0;
if (fastpath_ok) {
Expand Down Expand Up @@ -1180,10 +1179,11 @@ static int usnic_component_progress(void)
usnic_handle_cq_error(module, channel, ret);
}
}
check_priority = false;
}

fastpath_ok = true;
return count + usnic_component_progress_2();
return count + usnic_component_progress_2(check_priority);
}

static int usnic_handle_completion(
Expand Down Expand Up @@ -1304,7 +1304,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
}
}

static int usnic_component_progress_2(void)
static int usnic_component_progress_2(bool check_priority)
{
int i, j, count = 0, num_events, ret;
opal_btl_usnic_module_t* module;
Expand All @@ -1313,15 +1313,18 @@ static int usnic_component_progress_2(void)
int rc;
int c;

/* update our simulated clock */
opal_btl_usnic_ticks += 5000;
opal_btl_usnic_ticks += 1;

/* If we need to check priority, start with the priority channel.
Otherwise, just check the data channel. */
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;

/* Poll for completions */
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
module = mca_btl_usnic_component.usnic_active_modules[i];

/* poll each channel */
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
channel = &module->mod_channels[c];

if (channel->chan_deferred_recv != NULL) {
Expand Down
8 changes: 8 additions & 0 deletions opal/mca/btl/usnic/btl_usnic_mca.c
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,14 @@ int opal_btl_usnic_component_register(void)
5000, &mca_btl_usnic_component.retrans_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_5));

CHECK(reg_int("max_resends_per_iteration", "Maximum number of frames to resend in a single iteration through usNIC component progress",
16, &mca_btl_usnic_component.max_resends_per_iteration,
REGINT_GE_ONE, OPAL_INFO_LVL_5));

CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
4, &mca_btl_usnic_component.ack_iteration_delay,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
0, &max_tiny_msg_size,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
Expand Down
28 changes: 14 additions & 14 deletions opal/mca/btl/usnic/btl_usnic_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -963,11 +963,12 @@ usnic_do_resends(
opal_btl_usnic_send_segment_t *sseg;
opal_btl_usnic_endpoint_t *endpoint;
struct opal_btl_usnic_channel_t *data_channel;
int ret;
int ret, count;

data_channel = &module->mod_channels[USNIC_DATA_CHANNEL];

while ((get_send_credits(data_channel) > 1) &&
count = mca_btl_usnic_component.max_resends_per_iteration;
while (count > 0 && (get_send_credits(data_channel) > 1) &&
!opal_list_is_empty(&module->pending_resend_segs)) {

/*
Expand Down Expand Up @@ -1009,6 +1010,8 @@ usnic_do_resends(
BTL_ERROR(("hotel checkin failed\n"));
abort(); /* should not be possible */
}

--count;
}
}

Expand Down Expand Up @@ -1236,7 +1239,7 @@ opal_btl_usnic_module_progress_sends(

/* Is it time to send ACK? */
if (endpoint->endpoint_acktime == 0 ||
endpoint->endpoint_acktime <= get_nsec()) {
endpoint->endpoint_acktime <= get_ticks()) {
if (OPAL_LIKELY(opal_btl_usnic_ack_send(module, endpoint) == OPAL_SUCCESS)) {
opal_btl_usnic_remove_from_endpoints_needing_ack(endpoint);
} else {
Expand Down Expand Up @@ -2366,14 +2369,14 @@ static void init_freelists(opal_btl_usnic_module_t *module)
uint32_t segsize;

segsize = (module->local_modex.max_msg_size +
opal_cache_line_size - 1) &
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) &
~(opal_cache_line_size - 1);

/* Send frags freelists */
OBJ_CONSTRUCT(&module->small_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->small_send_frags,
sizeof(opal_btl_usnic_small_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_small_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_small_send_frag_t),
segsize,
Expand All @@ -2390,8 +2393,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)

OBJ_CONSTRUCT(&module->large_send_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->large_send_frags,
sizeof(opal_btl_usnic_large_send_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_large_send_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_large_send_frag_t),
0, /* payload size */
Expand All @@ -2408,8 +2410,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)

OBJ_CONSTRUCT(&module->put_dest_frags, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->put_dest_frags,
sizeof(opal_btl_usnic_put_dest_frag_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_put_dest_frag_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_put_dest_frag_t),
0, /* payload size */
Expand All @@ -2427,8 +2428,7 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* list of segments to use for sending */
OBJ_CONSTRUCT(&module->chunk_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->chunk_segs,
sizeof(opal_btl_usnic_chunk_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_chunk_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_chunk_segment_t),
segsize,
Expand All @@ -2446,11 +2446,11 @@ static void init_freelists(opal_btl_usnic_module_t *module)
/* ACK segments freelist */
uint32_t ack_segment_len;
ack_segment_len = (sizeof(opal_btl_usnic_btl_header_t) +
mca_btl_usnic_component.prefix_send_offset +
opal_cache_line_size - 1) & ~(opal_cache_line_size - 1);
OBJ_CONSTRUCT(&module->ack_segs, opal_free_list_t);
rc = usnic_compat_free_list_init(&module->ack_segs,
sizeof(opal_btl_usnic_ack_segment_t) +
mca_btl_usnic_component.prefix_send_offset,
sizeof(opal_btl_usnic_ack_segment_t),
opal_cache_line_size,
OBJ_CLASS(opal_btl_usnic_ack_segment_t),
ack_segment_len,
Expand Down
7 changes: 5 additions & 2 deletions opal/mca/btl/usnic/btl_usnic_recv.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
}

/* give this process a chance to send something before ACKing */
/* A hueristic: set to send this ACK after we have checked our
incoming DATA_CHANNEL component.act_iteration_delay times
(i.e., so we can piggyback an ACK on an outgoing send) */
if (0 == endpoint->endpoint_acktime) {
endpoint->endpoint_acktime = get_nsec() + 50000; /* 50 usec */
endpoint->endpoint_acktime =
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
}

/* Save this incoming segment in the received segmentss array on the
Expand Down