Skip to content

Commit 5b1d4fd

Browse files
authored
Merge pull request #2692 from jsquyres/pr/master/usnic-queue-fixes
master: usnic queue fixes
2 parents d8ed7b5 + b980e33 commit 5b1d4fd

12 files changed

+208
-66
lines changed

opal/mca/btl/usnic/btl_usnic_ack.c

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -26,18 +26,24 @@
2626
#include "btl_usnic_connectivity.h"
2727

2828
/*
29-
* Force a retrans of a segment
29+
* Special case: we know exactly which segment is missing at the
30+
* receive; explicitly force retrans of that segment.
3031
*/
3132
static void
32-
opal_btl_usnic_force_retrans(
33+
opal_btl_usnic_fast_retrans(
3334
opal_btl_usnic_endpoint_t *endpoint,
3435
opal_btl_usnic_seq_t ack_seq)
3536
{
3637
opal_btl_usnic_send_segment_t *sseg;
3738
int is;
3839

39-
is = WINDOW_SIZE_MOD(ack_seq+1);
40+
is = WINDOW_SIZE_MOD(ack_seq + 1);
4041
sseg = endpoint->endpoint_sent_segs[is];
42+
43+
// If the sseg is NULL, then there's nothing to retransmit. If
44+
// the hotel room is -1, the segment has already been queued up
45+
// for retransmit and there's nothing additional we need to do
46+
// here.
4147
if (sseg == NULL || sseg->ss_hotel_room == -1) {
4248
return;
4349
}
@@ -79,12 +85,14 @@ opal_btl_usnic_handle_ack(
7985
#endif
8086
++module->stats.num_old_dup_acks;
8187
return;
88+
}
8289

83-
/* A duplicate ACK means next seg was lost */
84-
} else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) {
90+
/* A duplicate ACK means the sender did not receive the next
91+
seg that we sent */
92+
else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) {
8593
++module->stats.num_dup_acks;
8694

87-
opal_btl_usnic_force_retrans(endpoint, ack_seq);
95+
opal_btl_usnic_fast_retrans(endpoint, ack_seq);
8896
return;
8997
}
9098

@@ -114,12 +122,11 @@ opal_btl_usnic_handle_ack(
114122
already been evicted and queued for resend.
115123
If it's not in the hotel, don't check it out! */
116124
if (OPAL_LIKELY(sseg->ss_hotel_room != -1)) {
117-
118125
opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room);
119126
sseg->ss_hotel_room = -1;
120-
127+
}
121128
/* hotel_room == -1 means queued for resend, remove it */
122-
} else {
129+
else {
123130
opal_list_remove_item((&module->pending_resend_segs),
124131
&sseg->ss_base.us_list.super);
125132
}
@@ -191,19 +198,27 @@ opal_btl_usnic_handle_ack(
191198
/*
192199
* Send an ACK
193200
*/
194-
void
201+
int
195202
opal_btl_usnic_ack_send(
196203
opal_btl_usnic_module_t *module,
197204
opal_btl_usnic_endpoint_t *endpoint)
198205
{
199206
opal_btl_usnic_ack_segment_t *ack;
200207

208+
/* If we don't have any send credits in the priority channel,
209+
don't send it */
210+
if (module->mod_channels[USNIC_PRIORITY_CHANNEL].credits < 1) {
211+
return OPAL_ERR_OUT_OF_RESOURCE;
212+
}
213+
201214
/* Get an ACK frag. If we don't get one, just discard this ACK. */
202215
ack = opal_btl_usnic_ack_segment_alloc(module);
203216
if (OPAL_UNLIKELY(NULL == ack)) {
204-
return;
217+
return OPAL_ERR_OUT_OF_RESOURCE;
205218
}
206219

220+
--module->mod_channels[USNIC_PRIORITY_CHANNEL].credits;
221+
207222
/* send the seq of the lowest item in the window that
208223
we've received */
209224
ack->ss_base.us_btl_header->ack_seq =
@@ -239,7 +254,7 @@ opal_btl_usnic_ack_send(
239254
/* Stats */
240255
++module->stats.num_ack_sends;
241256

242-
return;
257+
return OPAL_SUCCESS;
243258
}
244259

245260
/*
@@ -249,6 +264,7 @@ void
249264
opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module,
250265
opal_btl_usnic_ack_segment_t *ack)
251266
{
267+
++module->mod_channels[USNIC_PRIORITY_CHANNEL].credits;
252268
opal_btl_usnic_ack_segment_return(module, ack);
253269
++module->mod_channels[ack->ss_channel].credits;
254270
}
@@ -291,4 +307,3 @@ opal_btl_usnic_ack_timeout(
291307
/* Stats */
292308
++module->stats.num_timeout_retrans;
293309
}
294-

opal/mca/btl/usnic/btl_usnic_ack.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
2+
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved.
33
* $COPYRIGHT$
44
*
55
* Additional copyrights may follow
@@ -67,7 +67,7 @@ void opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module,
6767
/*
6868
* Send an ACK
6969
*/
70-
void opal_btl_usnic_ack_send(opal_btl_usnic_module_t *module,
70+
int opal_btl_usnic_ack_send(opal_btl_usnic_module_t *module,
7171
opal_btl_usnic_endpoint_t *endpoint);
7272

7373
/*

opal/mca/btl/usnic/btl_usnic_component.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* All rights reserved.
1313
* Copyright (c) 2006 Sandia National Laboratories. All rights
1414
* reserved.
15-
* Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved.
15+
* Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved.
1616
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
1717
* reserved.
1818
* Copyright (c) 2014 Intel, Inc. All rights reserved.
@@ -1159,6 +1159,8 @@ static int usnic_component_progress(void)
11591159
if (OPAL_LIKELY(OPAL_BTL_USNIC_SEG_RECV ==
11601160
rseg->rs_base.us_type)) {
11611161
opal_btl_usnic_recv_fast(module, rseg, channel);
1162+
++module->stats.num_seg_total_completions;
1163+
++module->stats.num_seg_recv_completions;
11621164
fastpath_ok = false; /* prevent starvation */
11631165
return 1;
11641166
} else {
@@ -1188,6 +1190,8 @@ static int usnic_handle_completion(
11881190
seg = (opal_btl_usnic_segment_t*)completion->op_context;
11891191
rseg = (opal_btl_usnic_recv_segment_t*)seg;
11901192

1193+
++module->stats.num_seg_total_completions;
1194+
11911195
/* Make the completion be Valgrind-defined */
11921196
opal_memchecker_base_mem_defined(seg, sizeof(*seg));
11931197

@@ -1198,24 +1202,30 @@ static int usnic_handle_completion(
11981202

11991203
/**** Send ACK completions ****/
12001204
case OPAL_BTL_USNIC_SEG_ACK:
1205+
++module->stats.num_seg_ack_completions;
12011206
opal_btl_usnic_ack_complete(module,
12021207
(opal_btl_usnic_ack_segment_t *)seg);
12031208
break;
12041209

1205-
/**** Send of frag segment completion ****/
1210+
/**** Send of frag segment completion (i.e., the MPI message's
1211+
one-and-only segment has completed sending) ****/
12061212
case OPAL_BTL_USNIC_SEG_FRAG:
1213+
++module->stats.num_seg_frag_completions;
12071214
opal_btl_usnic_frag_send_complete(module,
12081215
(opal_btl_usnic_frag_segment_t*)seg);
12091216
break;
12101217

1211-
/**** Send of chunk segment completion ****/
1218+
/**** Send of chunk segment completion (i.e., part of a large MPI
1219+
message is done sending) ****/
12121220
case OPAL_BTL_USNIC_SEG_CHUNK:
1221+
++module->stats.num_seg_chunk_completions;
12131222
opal_btl_usnic_chunk_send_complete(module,
12141223
(opal_btl_usnic_chunk_segment_t*)seg);
12151224
break;
12161225

12171226
/**** Receive completions ****/
12181227
case OPAL_BTL_USNIC_SEG_RECV:
1228+
++module->stats.num_seg_recv_completions;
12191229
opal_btl_usnic_recv(module, rseg, channel);
12201230
break;
12211231

opal/mca/btl/usnic/btl_usnic_endpoint.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2006 Sandia National Laboratories. All rights
1313
* reserved.
14-
* Copyright (c) 2013-2016 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved.
1515
* $COPYRIGHT$
1616
*
1717
* Additional copyrights may follow
@@ -160,6 +160,8 @@ typedef struct mca_btl_base_endpoint_t {
160160
opal_btl_usnic_seq_t endpoint_next_seq_to_send; /* n_t */
161161
opal_btl_usnic_seq_t endpoint_ack_seq_rcvd; /* n_a */
162162

163+
/* Table where sent segments sit while waiting for their ACKs.
164+
When a segment is ACKed, it is removed from this table. */
163165
struct opal_btl_usnic_send_segment_t *endpoint_sent_segs[WINDOW_SIZE];
164166

165167
/* Values for the current proc to receive from this endpoint on

opal/mca/btl/usnic/btl_usnic_frag.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2006 Sandia National Laboratories. All rights
1313
* reserved.
14-
* Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved.
1515
* $COPYRIGHT$
1616
*
1717
* Additional copyrights may follow
@@ -50,15 +50,15 @@ common_send_seg_helper(opal_btl_usnic_send_segment_t *seg)
5050

5151
static void
5252
chunk_seg_constructor(
53-
opal_btl_usnic_send_segment_t *seg)
53+
opal_btl_usnic_chunk_segment_t *cseg)
5454
{
5555
opal_btl_usnic_segment_t *bseg;
5656

57-
bseg = &seg->ss_base;
57+
bseg = &cseg->ss_base;
5858
bseg->us_type = OPAL_BTL_USNIC_SEG_CHUNK;
5959

6060
/* some more common initializaiton */
61-
common_send_seg_helper(seg);
61+
common_send_seg_helper(cseg);
6262

6363
/* payload starts next byte beyond BTL chunk header */
6464
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_chunk_header + 1);
@@ -68,15 +68,15 @@ chunk_seg_constructor(
6868

6969
static void
7070
frag_seg_constructor(
71-
opal_btl_usnic_send_segment_t *seg)
71+
opal_btl_usnic_frag_segment_t *fseg)
7272
{
7373
opal_btl_usnic_segment_t *bseg;
7474

75-
bseg = &seg->ss_base;
75+
bseg = &fseg->ss_base;
7676
bseg->us_type = OPAL_BTL_USNIC_SEG_FRAG;
7777

7878
/* some more common initializaiton */
79-
common_send_seg_helper(seg);
79+
common_send_seg_helper(fseg);
8080

8181
/* payload starts next byte beyond BTL header */
8282
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_header + 1);
@@ -86,7 +86,7 @@ frag_seg_constructor(
8686

8787
static void
8888
ack_seg_constructor(
89-
opal_btl_usnic_send_segment_t *ack)
89+
opal_btl_usnic_ack_segment_t *ack)
9090
{
9191
opal_btl_usnic_segment_t *bseg;
9292

opal/mca/btl/usnic/btl_usnic_frag.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2006 Sandia National Laboratories. All rights
1313
* reserved.
14-
* Copyright (c) 2013-2015 Cisco Systems, Inc. All rights reserved.
14+
* Copyright (c) 2013-2017 Cisco Systems, Inc. All rights reserved.
1515
* $COPYRIGHT$
1616
*
1717
* Additional copyrights may follow
@@ -370,6 +370,7 @@ opal_btl_usnic_small_send_frag_alloc(opal_btl_usnic_module_t *module)
370370

371371
/* this belongs in constructor... */
372372
frag->ssf_base.sf_base.uf_freelist = &(module->small_send_frags);
373+
frag->ssf_segment.ss_send_posted = 0;
373374

374375
assert(frag);
375376
assert(OPAL_BTL_USNIC_FRAG_SMALL_SEND == frag->ssf_base.sf_base.uf_type);
@@ -480,6 +481,14 @@ opal_btl_usnic_frag_return(
480481
}
481482
}
482483

484+
/* Reset the "send_posted" flag on the embedded segment for small
485+
fragments */
486+
else if (frag->uf_type == OPAL_BTL_USNIC_FRAG_SMALL_SEND) {
487+
opal_btl_usnic_small_send_frag_t *sfrag;
488+
sfrag = (opal_btl_usnic_small_send_frag_t *) frag;
489+
sfrag->ssf_segment.ss_send_posted = 0;
490+
}
491+
483492
USNIC_COMPAT_FREE_LIST_RETURN(frag->uf_freelist, &(frag->uf_base.super));
484493
}
485494

0 commit comments

Comments
 (0)