Skip to content

Commit 6445eee

Browse files
committed
fix(GRO): fix per-datagram parsing and loss accounting
Parse coalesced GRO payloads using the negotiated blksize stride and account loss/jitter per datagram. Avoids inflated “loss” when kernel GRO hints are unreliable. No change to GSO send behavior.
1 parent 36b0a72 commit 6445eee

File tree

1 file changed

+66
-133
lines changed

1 file changed

+66
-133
lines changed

src/iperf_udp.c

Lines changed: 66 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -89,22 +89,10 @@ iperf_udp_recv(struct iperf_stream *sp)
8989
dgram_sz = sp->settings->blksize;
9090

9191
if (sp->test->settings->gro) {
92-
size = sp->test->settings->gro_bf_size;
93-
r = Nread_gro(sp->socket, sp->buffer, size, Pudp, &dgram_sz);
94-
if (dgram_sz == -1) {
95-
/*
96-
* For corner case where the socket configuration is
97-
* successful but the kernel network layer doesn't provide
98-
* GRO-format data or ancillary info.
99-
*/
100-
dgram_sz = sp->settings->blksize;
101-
}
102-
/* Validate dgram_sz against reasonable bounds */
103-
if (dgram_sz <= 0 || dgram_sz < min_pkt_size || dgram_sz > sp->test->settings->gro_bf_size) {
104-
if (test->debug_level >= DEBUG_LEVEL_INFO)
105-
printf("Invalid GRO dgram_sz %d, falling back to blksize %d\n", dgram_sz, sp->settings->blksize);
106-
dgram_sz = sp->settings->blksize;
107-
}
92+
size = sp->test->settings->gro_bf_size;
93+
r = Nread_gro(sp->socket, sp->buffer, size, Pudp, &dgram_sz);
94+
/* Use negotiated block size for GRO segment stride to ensure correct parsing. */
95+
dgram_sz = sp->settings->blksize;
10896
} else {
10997
/* GRO available but disabled - use normal UDP receive and single packet size */
11098
r = Nrecv_no_select(sp->socket, sp->buffer, size, Pudp, sock_opt);
@@ -146,49 +134,66 @@ iperf_udp_recv(struct iperf_stream *sp)
146134
dgram_buf_end = sp->buffer + r;
147135
tmp_r = r;
148136

149-
/* Ensure we process complete datagrams only */
150-
while (tmp_r >= dgram_sz && dgram_buf + dgram_sz <= dgram_buf_end) {
151-
cnt++;
152-
if (sp->test->debug)
153-
printf("%d (%d) remaining %d\n", cnt, dgram_sz, tmp_r);
137+
/* Ensure we process complete datagrams only */
138+
while (tmp_r >= dgram_sz && dgram_buf + dgram_sz <= dgram_buf_end) {
139+
cnt++;
140+
141+
/* Ensure we have enough bytes for the packet header */
142+
if (tmp_r < min_pkt_size)
143+
break;
144+
145+
if (sp->test->udp_counters_64bit) {
146+
/* Verify we have enough space for 64-bit counter */
147+
if (tmp_r < sizeof(uint32_t) * 2 + sizeof(uint64_t))
148+
break;
149+
memcpy(&sec, dgram_buf, sizeof(sec));
150+
memcpy(&usec, dgram_buf+4, sizeof(usec));
151+
memcpy(&pcount, dgram_buf+8, sizeof(pcount));
152+
sec = ntohl(sec);
153+
usec = ntohl(usec);
154+
pcount = be64toh(pcount);
155+
sent_time.secs = sec;
156+
sent_time.usecs = usec;
157+
} else {
158+
uint32_t pc;
159+
memcpy(&sec, dgram_buf, sizeof(sec));
160+
memcpy(&usec, dgram_buf+4, sizeof(usec));
161+
memcpy(&pc, dgram_buf+8, sizeof(pc));
162+
sec = ntohl(sec);
163+
usec = ntohl(usec);
164+
pcount = ntohl(pc);
165+
sent_time.secs = sec;
166+
sent_time.usecs = usec;
167+
}
154168

155-
/* Ensure we have enough bytes for the packet header */
156-
if (tmp_r < min_pkt_size) {
157-
if (test->debug_level >= DEBUG_LEVEL_INFO)
158-
printf("Incomplete packet header: %d bytes remaining\n", tmp_r);
159-
break;
160-
}
169+
/* Per-datagram loss/out-of-order accounting */
170+
if (pcount >= sp->packet_count + 1) {
171+
if (pcount > sp->packet_count + 1) {
172+
sp->cnt_error += (pcount - 1) - sp->packet_count;
173+
}
174+
sp->packet_count = pcount;
175+
} else {
176+
sp->outoforder_packets++;
177+
if (sp->cnt_error > 0)
178+
sp->cnt_error--;
179+
}
161180

162-
if (sp->test->udp_counters_64bit) {
163-
/* Verify we have enough space for 64-bit counter */
164-
if (tmp_r < sizeof(uint32_t) * 2 + sizeof(uint64_t)) {
165-
if (test->debug_level >= DEBUG_LEVEL_INFO)
166-
printf("Incomplete 64-bit packet: %d bytes remaining\n", tmp_r);
167-
break;
168-
}
169-
memcpy(&sec, dgram_buf, sizeof(sec));
170-
memcpy(&usec, dgram_buf+4, sizeof(usec));
171-
memcpy(&pcount, dgram_buf+8, sizeof(pcount));
172-
sec = ntohl(sec);
173-
usec = ntohl(usec);
174-
pcount = be64toh(pcount);
175-
sent_time.secs = sec;
176-
sent_time.usecs = usec;
177-
}
178-
else {
179-
uint32_t pc;
180-
memcpy(&sec, dgram_buf, sizeof(sec));
181-
memcpy(&usec, dgram_buf+4, sizeof(usec));
182-
memcpy(&pc, dgram_buf+8, sizeof(pc));
183-
sec = ntohl(sec);
184-
usec = ntohl(usec);
185-
pcount = ntohl(pc);
186-
sent_time.secs = sec;
187-
sent_time.usecs = usec;
188-
}
189-
dgram_buf += dgram_sz;
190-
tmp_r -= dgram_sz;
191-
} // end while loop
181+
/* Per-datagram jitter computation */
182+
iperf_time_now(&arrival_time);
183+
iperf_time_diff(&arrival_time, &sent_time, &temp_time);
184+
transit = iperf_time_in_secs(&temp_time);
185+
if (first_packet)
186+
sp->prev_transit = transit;
187+
d = transit - sp->prev_transit;
188+
if (d < 0)
189+
d = -d;
190+
sp->prev_transit = transit;
191+
sp->jitter += (d - sp->jitter) / 16.0;
192+
first_packet = 0;
193+
194+
dgram_buf += dgram_sz;
195+
tmp_r -= dgram_sz;
196+
} // end while loop
192197
} else {
193198
/* GRO disabled - process as single normal UDP packet */
194199
/* Dig the various counters out of the incoming UDP packet */
@@ -239,80 +244,7 @@ iperf_udp_recv(struct iperf_stream *sp)
239244
}
240245
#endif /* HAVE_UDP_GRO */
241246

242-
if (test->debug_level >= DEBUG_LEVEL_DEBUG)
243-
fprintf(stderr, "pcount %" PRIu64 " packet_count %" PRIu64 "\n", pcount, sp->packet_count);
244-
245-
/*
246-
* Try to handle out of order packets. The way we do this
247-
* uses a constant amount of storage but might not be
248-
* correct in all cases. In particular we seem to have the
249-
* assumption that packets can't be duplicated in the network,
250-
* because duplicate packets will possibly cause some problems here.
251-
*
252-
* First figure out if the sequence numbers are going forward.
253-
* Note that pcount is the sequence number read from the packet,
254-
* and sp->packet_count is the highest sequence number seen so
255-
* far (so we're expecting to see the packet with sequence number
256-
* sp->packet_count + 1 arrive next).
257-
*/
258-
if (pcount >= sp->packet_count + 1) {
259-
260-
/* Forward, but is there a gap in sequence numbers? */
261-
if (pcount > sp->packet_count + 1) {
262-
/* There's a gap so count that as a loss. */
263-
sp->cnt_error += (pcount - 1) - sp->packet_count;
264-
if (test->debug_level >= DEBUG_LEVEL_INFO)
265-
fprintf(stderr, "LOST %" PRIu64 " PACKETS - received packet %" PRIu64 " but expected sequence %" PRIu64 " on stream %d\n", (pcount - sp->packet_count + 1), pcount, sp->packet_count + 1, sp->socket);
266-
}
267-
/* Update the highest sequence number seen so far. */
268-
sp->packet_count = pcount;
269-
} else {
270-
271-
/*
272-
* Sequence number went backward (or was stationary?!?).
273-
* This counts as an out-of-order packet.
274-
*/
275-
sp->outoforder_packets++;
276-
277-
/*
278-
* If we have lost packets, then the fact that we are now
279-
* seeing an out-of-order packet offsets a prior sequence
280-
* number gap that was counted as a loss. So we can take
281-
* away a loss.
282-
*/
283-
if (sp->cnt_error > 0)
284-
sp->cnt_error--;
285-
286-
/* Log the out-of-order packet */
287-
if (test->debug_level >= DEBUG_LEVEL_INFO)
288-
fprintf(stderr, "OUT OF ORDER - received packet %" PRIu64 " but expected sequence %" PRIu64 " on stream %d\n", pcount, sp->packet_count + 1, sp->socket);
289-
}
290-
291-
/*
292-
* jitter measurement
293-
*
294-
* This computation is based on RFC 1889 (specifically
295-
* sections 6.3.1 and A.8).
296-
*
297-
* Note that synchronized clocks are not required since
298-
* the source packet delta times are known. Also this
299-
* computation does not require knowing the round-trip
300-
* time.
301-
*/
302-
iperf_time_now(&arrival_time);
303-
304-
iperf_time_diff(&arrival_time, &sent_time, &temp_time);
305-
transit = iperf_time_in_secs(&temp_time);
306-
307-
/* Hack to handle the first packet by initializing prev_transit. */
308-
if (first_packet)
309-
sp->prev_transit = transit;
310-
311-
d = transit - sp->prev_transit;
312-
if (d < 0)
313-
d = -d;
314-
sp->prev_transit = transit;
315-
sp->jitter += (d - sp->jitter) / 16.0;
247+
/* For GRO case, loss and jitter were handled per datagram inside the loop. */
316248
}
317249
else {
318250
if (test->debug_level >= DEBUG_LEVEL_INFO)
@@ -343,8 +275,9 @@ iperf_udp_send(struct iperf_stream *sp)
343275
const int min_pkt_size = sizeof(uint32_t) * 3; /* sec + usec + pcount (32-bit) */
344276

345277
if (sp->test->settings->gso) {
346-
dgram_sz = sp->test->settings->gso_dg_size;
347-
buf_sz = sp->test->settings->gso_bf_size;
278+
dgram_sz = sp->test->settings->gso_dg_size;
279+
/* Use full GSO buffer to pack multiple datagrams, as originally. */
280+
buf_sz = sp->test->settings->gso_bf_size;
348281
/* Validate GSO parameters */
349282
if (dgram_sz <= 0 || dgram_sz < min_pkt_size || dgram_sz > buf_sz) {
350283
if (sp->test->debug_level >= DEBUG_LEVEL_INFO)

0 commit comments

Comments
 (0)