Skip to content

Commit 968b1a5

Browse files
committed
btl/usnic: clarifications and fixes regarding ACKs
New MCA parameter: btl_usnic_ack_iteration_delay. Set this to the number of times through the usNIC component progress function before sending a standalone ACK (vs. piggy-backing the ACK on any other send going to the target peer). Use "ticks" language to clarify that we're really counting the number of times through the usNIC component DATA_CHANNEL completion check (to check for incoming messages) -- it has no relation to wall clock time whatsoever. Also slightly change the channel-checking scheme in usNIC component progress: only check the PRIORITY channel once (vs. checking it once, not finding anything, and then falling through the progress_2() where we check PRIORITY again and then check the DATA channel). As before, if our "progress" libevent fires, increment the tick counter enough to guarantee that all endpoints that need an ACK will get triggered to send standalone ACKs the next time through progress, if necessary. Signed-off-by: Jeff Squyres <[email protected]>
1 parent ce2910a commit 968b1a5

File tree

4 files changed

+27
-13
lines changed

4 files changed

+27
-13
lines changed

opal/mca/btl/usnic/btl_usnic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ typedef struct opal_btl_usnic_component_t {
190190
/** retrans characteristics */
191191
int retrans_timeout;
192192

193+
/** minimum number of times through component progress before
194+
checking to see if standalone ACKs need to be sent */
195+
int ack_iteration_delay;
196+
193197
/** transport header length for all usNIC devices on this server
194198
(it is guaranteed that all usNIC devices on a single server
195199
will have the same underlying transport, and therefore the

opal/mca/btl/usnic/btl_usnic_component.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,9 @@ static int check_usnic_config(opal_btl_usnic_module_t *module,
380380

381381
static void usnic_clock_callback(int fd, short flags, void *timeout)
382382
{
383-
/* 1ms == 1,000,000 ns */
384-
opal_btl_usnic_ticks += 1000000;
383+
/* Increase by so many ticks that we will definitely force sending
384+
any ACKs that are pending */
385+
opal_btl_usnic_ticks += 1000;
385386

386387
/* run progress to make sure time change gets noticed */
387388
usnic_component_progress();
@@ -1128,7 +1129,7 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
11281129
*/
11291130
static int usnic_handle_completion(opal_btl_usnic_module_t* module,
11301131
opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
1131-
static int usnic_component_progress_2(void);
1132+
static int usnic_component_progress_2(bool check_priority);
11321133
static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
11331134
opal_btl_usnic_channel_t *channel, int cq_ret);
11341135

@@ -1141,9 +1142,7 @@ static int usnic_component_progress(void)
11411142
struct fi_cq_entry completion;
11421143
opal_btl_usnic_channel_t *channel;
11431144
static bool fastpath_ok = true;
1144-
1145-
/* update our simulated clock */
1146-
opal_btl_usnic_ticks += 5000;
1145+
bool check_priority = true;
11471146

11481147
count = 0;
11491148
if (fastpath_ok) {
@@ -1176,10 +1175,11 @@ static int usnic_component_progress(void)
11761175
usnic_handle_cq_error(module, channel, ret);
11771176
}
11781177
}
1178+
check_priority = false;
11791179
}
11801180

11811181
fastpath_ok = true;
1182-
return count + usnic_component_progress_2();
1182+
return count + usnic_component_progress_2(check_priority);
11831183
}
11841184

11851185
static int usnic_handle_completion(
@@ -1300,7 +1300,7 @@ usnic_handle_cq_error(opal_btl_usnic_module_t* module,
13001300
}
13011301
}
13021302

1303-
static int usnic_component_progress_2(void)
1303+
static int usnic_component_progress_2(bool check_priority)
13041304
{
13051305
int i, j, count = 0, num_events, ret;
13061306
opal_btl_usnic_module_t* module;
@@ -1309,15 +1309,18 @@ static int usnic_component_progress_2(void)
13091309
int rc;
13101310
int c;
13111311

1312-
/* update our simulated clock */
1313-
opal_btl_usnic_ticks += 5000;
1312+
opal_btl_usnic_ticks += 1;
1313+
1314+
/* If we need to check priority, start with the priority channel.
1315+
Otherwise, just check the data channel. */
1316+
int c_start = check_priority ? USNIC_PRIORITY_CHANNEL : USNIC_DATA_CHANNEL;
13141317

13151318
/* Poll for completions */
13161319
for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
13171320
module = mca_btl_usnic_component.usnic_active_modules[i];
13181321

13191322
/* poll each channel */
1320-
for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
1323+
for (c=c_start; c<USNIC_NUM_CHANNELS; ++c) {
13211324
channel = &module->mod_channels[c];
13221325

13231326
if (channel->chan_deferred_recv != NULL) {

opal/mca/btl/usnic/btl_usnic_mca.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,10 @@ int opal_btl_usnic_component_register(void)
249249
5000, &mca_btl_usnic_component.retrans_timeout,
250250
REGINT_GE_ONE, OPAL_INFO_LVL_5));
251251

252+
CHECK(reg_int("ack_iteration_delay", "Minimum number of times through usNIC \"progress\" function before checking to see if standalone ACKs need to be sent",
253+
0, &mca_btl_usnic_component.ack_iteration_delay,
254+
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
255+
252256
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
253257
0, &max_tiny_msg_size,
254258
REGINT_GE_ZERO, OPAL_INFO_LVL_5));

opal/mca/btl/usnic/btl_usnic_recv.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,12 @@ opal_btl_usnic_update_window(
112112
opal_btl_usnic_add_to_endpoints_needing_ack(endpoint);
113113
}
114114

115-
/* give this process a chance to send something before ACKing */
115+
/* A hueristic: set to send this ACK after we have checked our
116+
incoming DATA_CHANNEL component.act_iteration_delay times
117+
(i.e., so we can piggyback an ACK on an outgoing send) */
116118
if (0 == endpoint->endpoint_acktime) {
117-
endpoint->endpoint_acktime = get_ticks() + 50000;
119+
endpoint->endpoint_acktime =
120+
get_ticks() + mca_btl_usnic_component.ack_iteration_delay;
118121
}
119122

120123
/* Save this incoming segment in the received segmentss array on the

0 commit comments

Comments
 (0)