diff --git a/configure b/configure index f4d488697..82ede8a5e 100755 --- a/configure +++ b/configure @@ -17399,6 +17399,78 @@ printf "%s\n" "#define HAVE_IPPROTO_MPTCP 1" >>confdefs.h fi +# Check for UDP_SEGMENT sockopt +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking UDP_SEGMENT socket option" >&5 +printf %s "checking UDP_SEGMENT socket option... " >&6; } +if test ${iperf3_cv_header_udp_segment+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +int foo = UDP_SEGMENT; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + iperf3_cv_header_udp_segment=yes +else case e in #( + e) iperf3_cv_header_udp_segment=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $iperf3_cv_header_udp_segment" >&5 +printf "%s\n" "$iperf3_cv_header_udp_segment" >&6; } +if test "x$iperf3_cv_header_udp_segment" = "xyes"; then + +printf "%s\n" "#define HAVE_UDP_SEGMENT 1" >>confdefs.h + +fi + +# Check for UDP_GRO sockopt +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking UDP_GRO socket option" >&5 +printf %s "checking UDP_GRO socket option... " >&6; } +if test ${iperf3_cv_header_udp_gro+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +int foo = UDP_GRO; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + iperf3_cv_header_udp_gro=yes +else case e in #( + e) iperf3_cv_header_udp_gro=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $iperf3_cv_header_udp_gro" >&5 +printf "%s\n" "$iperf3_cv_header_udp_gro" >&6; } +if test "x$iperf3_cv_header_udp_gro" = "xyes"; then + +printf "%s\n" "#define HAVE_UDP_GRO 1" >>confdefs.h + +fi + # Check if we need -lrt for clock_gettime { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5 printf %s "checking for library containing clock_gettime... " >&6; } diff --git a/configure.ac b/configure.ac index 11f458690..27ce742f5 100644 --- a/configure.ac +++ b/configure.ac @@ -375,6 +375,30 @@ if test "x$iperf3_cv_header_ipproto_mptcp" = "xyes"; then AC_DEFINE([HAVE_IPPROTO_MPTCP], [1], [Have MPTCP protocol.]) fi +# Check for UDP_SEGMENT sockopt +AC_CACHE_CHECK([UDP_SEGMENT socket option], +[iperf3_cv_header_udp_segment], +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[int foo = UDP_SEGMENT;]])], + iperf3_cv_header_udp_segment=yes, + iperf3_cv_header_udp_segment=no)) +if test "x$iperf3_cv_header_udp_segment" = "xyes"; then + AC_DEFINE([HAVE_UDP_SEGMENT], [1], [Have UDP_SEGMENT sockopt.]) +fi + +# Check for UDP_GRO sockopt +AC_CACHE_CHECK([UDP_GRO socket option], +[iperf3_cv_header_udp_gro], +AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[int foo = UDP_GRO;]])], + iperf3_cv_header_udp_gro=yes, + iperf3_cv_header_udp_gro=no)) +if test "x$iperf3_cv_header_udp_gro" = "xyes"; then + AC_DEFINE([HAVE_UDP_GRO], [1], [Have UDP_GRO sockopt.]) +fi + # Check if we need -lrt for clock_gettime AC_SEARCH_LIBS(clock_gettime, [rt posix4]) # Check for clock_gettime support diff --git a/src/iperf.h b/src/iperf.h index 1371abdf0..67e96e87c 100644 --- a/src/iperf.h +++ b/src/iperf.h @@ -191,6 +191,15 @@ struct iperf_settings int cntl_ka_keepidle; /* Control TCP connection Keepalive idle time (TCP_KEEPIDLE) */ int cntl_ka_interval; /* Control TCP connection Keepalive interval between retries (TCP_KEEPINTV) */ int cntl_ka_count; /* Control TCP connection Keepalive number of retries (TCP_KEEPCNT) */ +#ifdef HAVE_UDP_SEGMENT + int gso; + int gso_dg_size; + int gso_bf_size; +#endif +#ifdef HAVE_UDP_GRO + int gro; + int gro_bf_size; +#endif }; struct iperf_test; @@ -486,4 +495,7 @@ extern int gerror; /* error value from getaddrinfo(3), for use in internal error /* In Reverse mode, maximum number of packets to wait for "accept" response - to handle out of order packets */ #define MAX_REVERSE_OUT_OF_ORDER_PACKETS 2 +#define GSO_BF_MAX_SIZE MAX_UDP_BLOCKSIZE +#define GRO_BF_MAX_SIZE MAX_UDP_BLOCKSIZE + #endif /* !__IPERF_H */ diff --git a/src/iperf_api.c b/src/iperf_api.c index 0e7e58917..92dc6ce3d 100644 --- a/src/iperf_api.c +++ b/src/iperf_api.c @@ -1188,6 +1188,9 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) #endif /* HAVE_TCP_KEEPALIVE */ #if defined(HAVE_IPPROTO_MPTCP) {"mptcp", no_argument, NULL, 'm'}, +#endif +#if defined(HAVE_UDP_SEGMENT) || defined(HAVE_UDP_GRO) + {"no-gsro", no_argument, NULL, OPT_NO_GSRO}, #endif {"debug", optional_argument, NULL, 'd'}, {"help", no_argument, NULL, 'h'}, @@ -1781,6 +1784,17 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) set_protocol(test, Ptcp); test->mptcp = 1; break; +#endif +#if defined(HAVE_UDP_SEGMENT) || defined(HAVE_UDP_GRO) + case OPT_NO_GSRO: + /* Disable GSO/GRO which would otherwise be enabled by default */ +#ifdef HAVE_UDP_SEGMENT + test->settings->gso = 0; +#endif +#ifdef HAVE_UDP_GRO + test->settings->gro = 0; +#endif + break; #endif case 'h': usage_long(stdout); @@ -1802,6 +1816,8 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) return -1; } +/* GSO/GRO are enabled by default when available, disabled only via --no-gsro */ + #if defined(HAVE_SSL) if (test->role == 's' && (client_username || client_rsa_public_key)){ @@ -1906,6 +1922,20 @@ iperf_parse_arguments(struct iperf_test *test, int argc, char **argv) i_errno = IEUDPBLOCKSIZE; return -1; } + +#ifdef HAVE_UDP_SEGMENT + if (test->protocol->id == Pudp && test->settings->gso) { + test->settings->gso_dg_size = blksize; + /* use the multiple of datagram size for the best efficiency. */ + if (test->settings->gso_dg_size > 0) { + test->settings->gso_bf_size = (test->settings->gso_bf_size / test->settings->gso_dg_size) * test->settings->gso_dg_size; + } else { + /* If gso_dg_size is 0 (unlimited bandwidth), use default UDP datagram size */ + test->settings->gso_dg_size = DEFAULT_UDP_BLKSIZE; + } + } +#endif + test->settings->blksize = blksize; if (!rate_flag) @@ -2449,6 +2479,22 @@ send_parameters(struct iperf_test *test) cJSON_AddNumberToObject(j, "pacing_timer", test->settings->pacing_timer); if (test->settings->burst) cJSON_AddNumberToObject(j, "burst", test->settings->burst); + +#ifdef HAVE_UDP_SEGMENT + /* Send UDP GSO settings from client to server */ + if (test->protocol->id == Pudp) { + cJSON_AddNumberToObject(j, "gso", test->settings->gso); + cJSON_AddNumberToObject(j, "gso_dg_size", test->settings->gso_dg_size); + cJSON_AddNumberToObject(j, "gso_bf_size", test->settings->gso_bf_size); + } +#endif +#ifdef HAVE_UDP_GRO + /* Send UDP GRO settings from client to server */ + if (test->protocol->id == Pudp) { + cJSON_AddNumberToObject(j, "gro", test->settings->gro); + cJSON_AddNumberToObject(j, "gro_bf_size", test->settings->gro_bf_size); + } +#endif if (test->settings->tos) cJSON_AddNumberToObject(j, "TOS", test->settings->tos); if (test->settings->flowlabel) @@ -2572,6 +2618,33 @@ get_parameters(struct iperf_test *test) test->settings->socket_bufsize = j_p->valueint; if ((j_p = iperf_cJSON_GetObjectItemType(j, "len", cJSON_Number)) != NULL) test->settings->blksize = j_p->valueint; + +#ifdef HAVE_UDP_SEGMENT + /* Accept UDP GSO settings provided by the client */ + if ((j_p = iperf_cJSON_GetObjectItemType(j, "gso", cJSON_Number)) != NULL) + test->settings->gso = j_p->valueint; + if ((j_p = iperf_cJSON_GetObjectItemType(j, "gso_dg_size", cJSON_Number)) != NULL) + test->settings->gso_dg_size = j_p->valueint; + if ((j_p = iperf_cJSON_GetObjectItemType(j, "gso_bf_size", cJSON_Number)) != NULL) + test->settings->gso_bf_size = j_p->valueint; + + /* Backward-compatibility: If client didn't send GSO params, derive from blksize. */ + if (test->protocol->id == Pudp && test->settings->gso == 1 && test->settings->gso_dg_size == 0) { + test->settings->gso_dg_size = test->settings->blksize; + if (test->settings->gso_dg_size > 0) { + test->settings->gso_bf_size = (test->settings->gso_bf_size / test->settings->gso_dg_size) * test->settings->gso_dg_size; + } else { + test->settings->gso_dg_size = DEFAULT_UDP_BLKSIZE; + } + } +#endif +#ifdef HAVE_UDP_GRO + /* Accept UDP GRO settings provided by the client */ + if ((j_p = iperf_cJSON_GetObjectItemType(j, "gro", cJSON_Number)) != NULL) + test->settings->gro = j_p->valueint; + if ((j_p = iperf_cJSON_GetObjectItemType(j, "gro_bf_size", cJSON_Number)) != NULL) + test->settings->gro_bf_size = j_p->valueint; +#endif if ((j_p = iperf_cJSON_GetObjectItemType(j, "bandwidth", cJSON_Number)) != NULL) test->settings->rate = j_p->valueint; if ((j_p = iperf_cJSON_GetObjectItemType(j, "fqrate", cJSON_Number)) != NULL) @@ -3231,6 +3304,15 @@ iperf_defaults(struct iperf_test *testp) testp->settings->fqrate = 0; testp->settings->pacing_timer = DEFAULT_PACING_TIMER; testp->settings->burst = 0; +#ifdef HAVE_UDP_SEGMENT + testp->settings->gso = 1; /* Enable GSO by default */ + testp->settings->gso_dg_size = 0; + testp->settings->gso_bf_size = GSO_BF_MAX_SIZE; +#endif +#ifdef HAVE_UDP_GRO + testp->settings->gro = 1; /* Enable GRO by default */ + testp->settings->gro_bf_size = GRO_BF_MAX_SIZE; +#endif testp->settings->mss = 0; testp->settings->bytes = 0; testp->settings->blocks = 0; @@ -3544,6 +3626,13 @@ iperf_reset_test(struct iperf_test *test) test->settings->burst = 0; test->settings->mss = 0; test->settings->tos = 0; +#ifdef HAVE_UDP_SEGMENT + test->settings->gso_dg_size = 0; + test->settings->gso_bf_size = GSO_BF_MAX_SIZE; +#endif +#ifdef HAVE_UDP_GRO + test->settings->gro_bf_size = GRO_BF_MAX_SIZE; +#endif test->settings->dont_fragment = 0; test->zerocopy = 0; test->settings->skip_rx_copy = 0; @@ -4708,6 +4797,7 @@ iperf_new_stream(struct iperf_test *test, int s, int sender) { struct iperf_stream *sp; int ret = 0; + int size; char template[1024]; if (test->tmp_template) { @@ -4766,13 +4856,24 @@ iperf_new_stream(struct iperf_test *test, int s, int sender) free(sp); return NULL; } - if (ftruncate(sp->buffer_fd, test->settings->blksize) < 0) { + size = test->settings->blksize; +#ifdef HAVE_UDP_SEGMENT + if (test->protocol->id == Pudp && test->settings->gso && (size < test->settings->gso_bf_size)) + size = test->settings->gso_bf_size; +#endif +#ifdef HAVE_UDP_GRO + if (test->protocol->id == Pudp && test->settings->gro && (size < test->settings->gro_bf_size)) + size = test->settings->gro_bf_size; +#endif + if (sp->test->debug) + printf("Buffer %d bytes\n", size); + if (ftruncate(sp->buffer_fd, size) < 0) { i_errno = IECREATESTREAM; free(sp->result); free(sp); return NULL; } - sp->buffer = (char *) mmap(NULL, test->settings->blksize, PROT_READ|PROT_WRITE, MAP_SHARED, sp->buffer_fd, 0); + sp->buffer = (char *) mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, sp->buffer_fd, 0); if (sp->buffer == MAP_FAILED) { i_errno = IECREATESTREAM; free(sp->result); diff --git a/src/iperf_api.h b/src/iperf_api.h index 9d783a316..ce7098a90 100644 --- a/src/iperf_api.h +++ b/src/iperf_api.h @@ -106,6 +106,7 @@ typedef atomic_uint_fast64_t atomic_iperf_size_t; #define OPT_SKIP_RX_COPY 32 #define OPT_JSON_STREAM_FULL_OUTPUT 33 #define OPT_SERVER_MAX_DURATION 34 +#define OPT_NO_GSRO 35 /* states */ #define TEST_START 1 diff --git a/src/iperf_client_api.c b/src/iperf_client_api.c index 37d741f87..b4280ecc0 100644 --- a/src/iperf_client_api.c +++ b/src/iperf_client_api.c @@ -505,29 +505,41 @@ iperf_connect(struct iperf_test *test) * the user always has the option to override. */ if (test->protocol->id == Pudp) { - if (test->settings->blksize == 0) { - if (test->ctrl_sck_mss) { - test->settings->blksize = test->ctrl_sck_mss; - } - else { - test->settings->blksize = DEFAULT_UDP_BLKSIZE; - } - if (test->verbose) { - printf("Setting UDP block size to %d\n", test->settings->blksize); - } - } + if (test->settings->blksize == 0) { + if (test->ctrl_sck_mss) { + test->settings->blksize = test->ctrl_sck_mss; + } + else { + test->settings->blksize = DEFAULT_UDP_BLKSIZE; + } + if (test->verbose) { + printf("Setting UDP block size to %d\n", test->settings->blksize); + } + } +#ifdef HAVE_UDP_SEGMENT + if (test->settings->gso) { + test->settings->gso_dg_size = test->settings->blksize; + /* use the multiple of datagram size for the best efficiency. */ + if (test->settings->gso_dg_size > 0) { + test->settings->gso_bf_size = (test->settings->gso_bf_size / test->settings->gso_dg_size) * test->settings->gso_dg_size; + } else { + /* If gso_dg_size is 0 (unlimited bandwidth), use default UDP datagram size */ + test->settings->gso_dg_size = DEFAULT_UDP_BLKSIZE; + } + } +#endif - /* - * Regardless of whether explicitly or implicitly set, if the - * block size is larger than the MSS, print a warning. - */ - if (test->ctrl_sck_mss > 0 && - test->settings->blksize > test->ctrl_sck_mss) { - char str[WARN_STR_LEN]; - snprintf(str, sizeof(str), - "UDP block size %d exceeds TCP MSS %d, may result in fragmentation / drops", test->settings->blksize, test->ctrl_sck_mss); - warning(str); - } + /* + * Regardless of whether explicitly or implicitly set, if the + * block size is larger than the MSS, print a warning. + */ + if (test->ctrl_sck_mss > 0 && + test->settings->blksize > test->ctrl_sck_mss) { + char str[WARN_STR_LEN]; + snprintf(str, sizeof(str), + "UDP block size %d exceeds TCP MSS %d, may result in fragmentation / drops", test->settings->blksize, test->ctrl_sck_mss); + warning(str); + } } return 0; diff --git a/src/iperf_config.h.in b/src/iperf_config.h.in index 393dcdfa2..fa22a17e0 100644 --- a/src/iperf_config.h.in +++ b/src/iperf_config.h.in @@ -132,6 +132,12 @@ /* Have TCP_USER_TIMEOUT sockopt. */ #undef HAVE_TCP_USER_TIMEOUT +/* Have UDP_GRO sockopt. */ +#undef HAVE_UDP_GRO + +/* Have UDP_SEGMENT sockopt. */ +#undef HAVE_UDP_SEGMENT + /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H diff --git a/src/iperf_locale.c b/src/iperf_locale.c index 78df64d73..40c75e592 100644 --- a/src/iperf_locale.c +++ b/src/iperf_locale.c @@ -219,6 +219,9 @@ const char usage_longstr[] = "Usage: iperf3 [-s|-c host] [options]\n" " --extra-data str data string to include in client and server JSON\n" " --get-server-output get results from server\n" " --udp-counters-64bit use 64-bit counters in UDP test packets\n" +#if defined(HAVE_UDP_SEGMENT) || defined(HAVE_UDP_GRO) + " --no-gsro disable UDP GSO/GRO (Generic Segmentation/Receive Offload)\n" +#endif " --repeating-payload use repeating pattern in payload, instead of\n" " randomized payload (like in iperf2)\n" #if defined(HAVE_DONT_FRAGMENT) diff --git a/src/iperf_udp.c b/src/iperf_udp.c index c8835e6d7..cfe95ab13 100644 --- a/src/iperf_udp.c +++ b/src/iperf_udp.c @@ -24,6 +24,8 @@ * This code is distributed under a BSD style license, see the LICENSE * file for complete information. */ +#include "iperf_config.h" + #include #include #include @@ -38,6 +40,9 @@ #include #include #include +#if defined(HAVE_UDP_SEGMENT) || defined(HAVE_UDP_GRO) +#include +#endif #include "iperf.h" #include "iperf_api.h" @@ -72,7 +77,30 @@ iperf_udp_recv(struct iperf_stream *sp) } #endif /* HAVE_MSG_TRUNC */ +#ifdef HAVE_UDP_GRO + int tmp_r; + int dgram_sz; + int cnt = 0; + char *dgram_buf; + char *dgram_buf_end; + const int min_pkt_size = sizeof(uint32_t) * 3; /* sec + usec + pcount (32-bit) */ + + /* Initialize dgram_sz for both GRO enabled and disabled cases */ + dgram_sz = sp->settings->blksize; + + if (sp->test->settings->gro) { + size = sp->test->settings->gro_bf_size; + r = Nread_gro(sp->socket, sp->buffer, size, Pudp, &dgram_sz); + /* Use negotiated block size for GRO segment stride to ensure correct parsing. */ + dgram_sz = sp->settings->blksize; + } else { + /* GRO available but disabled - use normal UDP receive and single packet size */ + r = Nrecv_no_select(sp->socket, sp->buffer, size, Pudp, sock_opt); + dgram_sz = sp->settings->blksize; + } +#else r = Nrecv_no_select(sp->socket, sp->buffer, size, Pudp, sock_opt); +#endif /* * If we got an error in the read, or if we didn't read anything @@ -96,6 +124,102 @@ iperf_udp_recv(struct iperf_stream *sp) sp->result->bytes_received += r; sp->result->bytes_received_this_interval += r; + if (sp->test->debug) + printf("received %d bytes of %d, total %" PRIu64 "\n", r, size, sp->result->bytes_received); + +#ifdef HAVE_UDP_GRO + if (sp->test->settings->gro) { + /* GRO enabled - process multiple datagrams */ + dgram_buf = sp->buffer; + dgram_buf_end = sp->buffer + r; + tmp_r = r; + + /* Ensure we process complete datagrams only */ + while (tmp_r >= dgram_sz && dgram_buf + dgram_sz <= dgram_buf_end) { + cnt++; + + /* Ensure we have enough bytes for the packet header */ + if (tmp_r < min_pkt_size) + break; + + if (sp->test->udp_counters_64bit) { + /* Verify we have enough space for 64-bit counter */ + if (tmp_r < sizeof(uint32_t) * 2 + sizeof(uint64_t)) + break; + memcpy(&sec, dgram_buf, sizeof(sec)); + memcpy(&usec, dgram_buf+4, sizeof(usec)); + memcpy(&pcount, dgram_buf+8, sizeof(pcount)); + sec = ntohl(sec); + usec = ntohl(usec); + pcount = be64toh(pcount); + sent_time.secs = sec; + sent_time.usecs = usec; + } else { + uint32_t pc; + memcpy(&sec, dgram_buf, sizeof(sec)); + memcpy(&usec, dgram_buf+4, sizeof(usec)); + memcpy(&pc, dgram_buf+8, sizeof(pc)); + sec = ntohl(sec); + usec = ntohl(usec); + pcount = ntohl(pc); + sent_time.secs = sec; + sent_time.usecs = usec; + } + + /* Per-datagram loss/out-of-order accounting */ + if (pcount >= sp->packet_count + 1) { + if (pcount > sp->packet_count + 1) { + sp->cnt_error += (pcount - 1) - sp->packet_count; + } + sp->packet_count = pcount; + } else { + sp->outoforder_packets++; + if (sp->cnt_error > 0) + sp->cnt_error--; + } + + /* Per-datagram jitter computation */ + iperf_time_now(&arrival_time); + iperf_time_diff(&arrival_time, &sent_time, &temp_time); + transit = iperf_time_in_secs(&temp_time); + if (first_packet) + sp->prev_transit = transit; + d = transit - sp->prev_transit; + if (d < 0) + d = -d; + sp->prev_transit = transit; + sp->jitter += (d - sp->jitter) / 16.0; + first_packet = 0; + + dgram_buf += dgram_sz; + tmp_r -= dgram_sz; + } // end while loop + } else { + /* GRO disabled - process as single normal UDP packet */ + /* Dig the various counters out of the incoming UDP packet */ + if (test->udp_counters_64bit) { + memcpy(&sec, sp->buffer, sizeof(sec)); + memcpy(&usec, sp->buffer+4, sizeof(usec)); + memcpy(&pcount, sp->buffer+8, sizeof(pcount)); + sec = ntohl(sec); + usec = ntohl(usec); + pcount = be64toh(pcount); + sent_time.secs = sec; + sent_time.usecs = usec; + } + else { + uint32_t pc; + memcpy(&sec, sp->buffer, sizeof(sec)); + memcpy(&usec, sp->buffer+4, sizeof(usec)); + memcpy(&pc, sp->buffer+8, sizeof(pc)); + sec = ntohl(sec); + usec = ntohl(usec); + pcount = ntohl(pc); + sent_time.secs = sec; + sent_time.usecs = usec; + } + } +#else /* Dig the various counters out of the incoming UDP packet */ if (test->udp_counters_64bit) { memcpy(&sec, sp->buffer, sizeof(sec)); @@ -118,81 +242,9 @@ iperf_udp_recv(struct iperf_stream *sp) sent_time.secs = sec; sent_time.usecs = usec; } +#endif /* HAVE_UDP_GRO */ - if (test->debug_level >= DEBUG_LEVEL_DEBUG) - fprintf(stderr, "pcount %" PRIu64 " packet_count %" PRIu64 "\n", pcount, sp->packet_count); - - /* - * Try to handle out of order packets. The way we do this - * uses a constant amount of storage but might not be - * correct in all cases. In particular we seem to have the - * assumption that packets can't be duplicated in the network, - * because duplicate packets will possibly cause some problems here. - * - * First figure out if the sequence numbers are going forward. - * Note that pcount is the sequence number read from the packet, - * and sp->packet_count is the highest sequence number seen so - * far (so we're expecting to see the packet with sequence number - * sp->packet_count + 1 arrive next). - */ - if (pcount >= sp->packet_count + 1) { - - /* Forward, but is there a gap in sequence numbers? */ - if (pcount > sp->packet_count + 1) { - /* There's a gap so count that as a loss. */ - sp->cnt_error += (pcount - 1) - sp->packet_count; - if (test->debug_level >= DEBUG_LEVEL_INFO) - fprintf(stderr, "LOST %" PRIu64 " PACKETS - received packet %" PRIu64 " but expected sequence %" PRIu64 " on stream %d\n", (pcount - sp->packet_count + 1), pcount, sp->packet_count + 1, sp->socket); - } - /* Update the highest sequence number seen so far. */ - sp->packet_count = pcount; - } else { - - /* - * Sequence number went backward (or was stationary?!?). - * This counts as an out-of-order packet. - */ - sp->outoforder_packets++; - - /* - * If we have lost packets, then the fact that we are now - * seeing an out-of-order packet offsets a prior sequence - * number gap that was counted as a loss. So we can take - * away a loss. - */ - if (sp->cnt_error > 0) - sp->cnt_error--; - - /* Log the out-of-order packet */ - if (test->debug_level >= DEBUG_LEVEL_INFO) - fprintf(stderr, "OUT OF ORDER - received packet %" PRIu64 " but expected sequence %" PRIu64 " on stream %d\n", pcount, sp->packet_count + 1, sp->socket); - } - - /* - * jitter measurement - * - * This computation is based on RFC 1889 (specifically - * sections 6.3.1 and A.8). - * - * Note that synchronized clocks are not required since - * the source packet delta times are known. Also this - * computation does not require knowing the round-trip - * time. - */ - iperf_time_now(&arrival_time); - - iperf_time_diff(&arrival_time, &sent_time, &temp_time); - transit = iperf_time_in_secs(&temp_time); - - /* Hack to handle the first packet by initializing prev_transit. */ - if (first_packet) - sp->prev_transit = transit; - - d = transit - sp->prev_transit; - if (d < 0) - d = -d; - sp->prev_transit = transit; - sp->jitter += (d - sp->jitter) / 16.0; + /* For GRO case, loss and jitter were handled per datagram inside the loop. */ } else { if (test->debug_level >= DEBUG_LEVEL_INFO) @@ -214,6 +266,84 @@ iperf_udp_send(struct iperf_stream *sp) int size = sp->settings->blksize; struct iperf_time before; +#ifdef HAVE_UDP_SEGMENT + int dgram_sz; + int buf_sz; + int cnt = 0; + char *dgram_buf; + char *dgram_buf_end; + const int min_pkt_size = sizeof(uint32_t) * 3; /* sec + usec + pcount (32-bit) */ + + if (sp->test->settings->gso) { + dgram_sz = sp->test->settings->gso_dg_size; + /* Use full GSO buffer to pack multiple datagrams, as originally. */ + buf_sz = sp->test->settings->gso_bf_size; + /* Validate GSO parameters */ + if (dgram_sz <= 0 || dgram_sz < min_pkt_size || dgram_sz > buf_sz) { + if (sp->test->debug_level >= DEBUG_LEVEL_INFO) + printf("Invalid GSO dgram_sz %d for buf_sz %d, disabling GSO\n", dgram_sz, buf_sz); + dgram_sz = buf_sz = size; + sp->test->settings->gso = 0; /* Disable GSO for safety */ + } + } else { + dgram_sz = buf_sz = size; + } + + dgram_buf = sp->buffer; + dgram_buf_end = sp->buffer + buf_sz; + + while (buf_sz > 0 && dgram_buf + dgram_sz <= dgram_buf_end) { + cnt++; + + if (sp->test->debug) + printf("%d (%d) remaining %d\n", cnt, dgram_sz, buf_sz); + + /* Prevent buffer underflow */ + if (buf_sz < dgram_sz) { + if (sp->test->debug_level >= DEBUG_LEVEL_INFO) + printf("Buffer underflow protection: buf_sz %d < dgram_sz %d\n", buf_sz, dgram_sz); + break; + } + + iperf_time_now(&before); + ++sp->packet_count; + + if (sp->test->udp_counters_64bit) { + + uint32_t sec, usec; + uint64_t pcount; + + sec = htonl(before.secs); + usec = htonl(before.usecs); + pcount = htobe64(sp->packet_count); + + memcpy(dgram_buf, &sec, sizeof(sec)); + memcpy(dgram_buf+4, &usec, sizeof(usec)); + memcpy(dgram_buf+8, &pcount, sizeof(pcount)); + + } + else { + + uint32_t sec, usec, pcount; + + sec = htonl(before.secs); + usec = htonl(before.usecs); + pcount = htonl(sp->packet_count); + + memcpy(dgram_buf, &sec, sizeof(sec)); + memcpy(dgram_buf+4, &usec, sizeof(usec)); + memcpy(dgram_buf+8, &pcount, sizeof(pcount)); + + } + dgram_buf += dgram_sz; + buf_sz -= dgram_sz; + } + + /* Warn if we didn't process all the buffer due to size mismatch */ + if (buf_sz > 0 && sp->test->debug_level >= DEBUG_LEVEL_INFO) { + printf("GSO: %d bytes remaining unprocessed\n", buf_sz); + } +#else iperf_time_now(&before); ++sp->packet_count; @@ -245,7 +375,14 @@ iperf_udp_send(struct iperf_stream *sp) memcpy(sp->buffer+8, &pcount, sizeof(pcount)); } +#endif /* HAVE_UDP_SEGMENT */ +#ifdef HAVE_UDP_SEGMENT + if (sp->test->settings->gso) { + size = sp->test->settings->gso_bf_size; + r = Nwrite_gso(sp->socket, sp->buffer, size, Pudp, sp->test->settings->gso_dg_size); + } else +#endif r = Nwrite(sp->socket, sp->buffer, size, Pudp); if (r <= 0) { @@ -262,7 +399,7 @@ iperf_udp_send(struct iperf_stream *sp) sp->result->bytes_sent_this_interval += r; if (sp->test->debug_level >= DEBUG_LEVEL_DEBUG) - printf("sent %d bytes of %d, total %" PRIu64 "\n", r, sp->settings->blksize, sp->result->bytes_sent); + printf("sent %d bytes of %d, total %" PRIu64 "\n", r, size, sp->result->bytes_sent); return r; } @@ -372,6 +509,42 @@ iperf_udp_buffercheck(struct iperf_test *test, int s) return rc; } +#ifdef HAVE_UDP_SEGMENT +int +iperf_udp_gso(struct iperf_test *test, int s) +{ + int rc; + int gso = test->settings->gso_dg_size; + + rc = setsockopt(s, IPPROTO_UDP, UDP_SEGMENT, (char*) &gso, sizeof(gso)); + if (rc) { + iperf_printf(test, "No GSO (%d)\n", rc); + test->settings->gso = 0; + } else + iperf_printf(test, "GSO (%d)\n", gso); + + return rc; +} +#endif + +#ifdef HAVE_UDP_GRO +int +iperf_udp_gro(struct iperf_test *test, int s) +{ + int rc; + int gro = 1; + + rc = setsockopt(s, IPPROTO_UDP, UDP_GRO, (char*) &gro, sizeof(gro)); + if (rc) { + iperf_printf(test, "No GRO (%d)\n", rc); + test->settings->gro = 0; + } else + iperf_printf(test, "GRO\n"); + + return rc; +} +#endif + /* * iperf_udp_accept * @@ -431,6 +604,15 @@ iperf_udp_accept(struct iperf_test *test) } } +#ifdef HAVE_UDP_SEGMENT + if (test->settings->gso) + iperf_udp_gso(test, s); +#endif +#ifdef HAVE_UDP_GRO + if (test->settings->gro) + iperf_udp_gro(test, s); +#endif + #if defined(HAVE_SO_MAX_PACING_RATE) /* If socket pacing is specified, try it. */ if (test->settings->fqrate) { @@ -530,6 +712,16 @@ iperf_udp_connect(struct iperf_test *test) if (rc < 0) /* error */ return rc; + +#ifdef HAVE_UDP_SEGMENT + if (test->settings->gso) + iperf_udp_gso(test, s); +#endif +#ifdef HAVE_UDP_GRO + if (test->settings->gro) + iperf_udp_gro(test, s); +#endif + /* * If the socket buffer was too small, but it was the default * size, then try explicitly setting it to something larger. diff --git a/src/net.c b/src/net.c index fe9c5877e..6495b0a83 100644 --- a/src/net.c +++ b/src/net.c @@ -38,6 +38,11 @@ #include #include #include +#if defined(HAVE_UDP_SEGMENT) || defined(HAVE_UDP_GRO) +#include +#endif + +#include "iperf.h" #ifdef HAVE_SENDFILE #ifdef linux @@ -520,6 +525,88 @@ Nrecv_no_select(int fd, char *buf, size_t count, int prot, int sock_opt) return count - nleft; } +#ifdef HAVE_UDP_GRO +static int recv_msg_gro(int fd, char *buf, int len, int *gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cmsg; + uint16_t *gsosizeptr; + int ret; + + /* Input validation */ + if (!buf || len <= 0 || !gso_size) { + return -1; + } + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + *gso_size = -1; + ret = recvmsg(fd, &msg, MSG_DONTWAIT); + + if (ret > 0) { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == IPPROTO_UDP && cmsg->cmsg_type == UDP_GRO) { + /* Validate cmsg data length */ + if (cmsg->cmsg_len >= CMSG_LEN(sizeof(uint16_t))) { + gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); + *gso_size = *gsosizeptr; + /* Sanity check the gso_size value */ + if (*gso_size <= 0 || *gso_size > len) { + *gso_size = -1; /* Mark as invalid */ + } + } + break; + } + } + } + + return ret; +} + +int +Nread_gro(int fd, char *buf, size_t count, int prot, int *dgram_sz) +{ + register ssize_t r; + + /* Input validation */ + if (!buf || count <= 0 || !dgram_sz) { + return NET_HARDERROR; + } + + /* Limit maximum buffer size to prevent excessive memory usage */ + if (count > MAX_UDP_BLOCKSIZE) { + count = MAX_UDP_BLOCKSIZE; + } + + r = recv_msg_gro(fd, buf, count, dgram_sz); + + if (r < 0) { + if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } else { + printf("\nUnexpected error (%d)\n", errno); + return NET_HARDERROR; + } + } + + /* Additional validation of returned dgram_sz */ + if (r > 0 && *dgram_sz > 0 && *dgram_sz > r) { + /* dgram_sz shouldn't be larger than actual received data */ + *dgram_sz = r; + } + + return r; +} +#endif /* HAVE_UDP_GRO */ /* * N W R I T E @@ -559,6 +646,79 @@ Nwrite(int fd, const char *buf, size_t count, int prot) return count; } +#ifdef HAVE_UDP_SEGMENT +static void udp_msg_gso(struct cmsghdr *cm, uint16_t gso_size) +{ + uint16_t *valp; + + cm->cmsg_level = IPPROTO_UDP; + cm->cmsg_type = UDP_SEGMENT; + cm->cmsg_len = CMSG_LEN(sizeof(gso_size)); + valp = (void *) CMSG_DATA(cm); + *valp = gso_size; +} + +static int udp_sendmsg_gso(int fd, const char *buf, size_t count, uint16_t gso_size) +{ + char control[CMSG_SPACE(sizeof(gso_size))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + size_t msg_controllen; + struct cmsghdr *cmsg; + int ret; + + iov.iov_base = (void *) buf; + iov.iov_len = count; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + cmsg = CMSG_FIRSTHDR(&msg); + + udp_msg_gso(cmsg, gso_size); + + msg_controllen = CMSG_SPACE(sizeof(gso_size)); + msg.msg_controllen = msg_controllen; + + ret = sendmsg(fd, &msg, 0); + + if (ret != iov.iov_len) + printf("msg: %u != %llu\n", ret, (unsigned long long) iov.iov_len); + + return ret; +} + +int +Nwrite_gso(int fd, const char *buf, size_t count, int prot, uint16_t gso_size) +{ + register ssize_t r; + + r = udp_sendmsg_gso(fd, buf, count, gso_size); + + if (r < 0) { + switch (errno) { + case EINTR: + case EAGAIN: +#if (EAGAIN != EWOULDBLOCK) + case EWOULDBLOCK: +#endif + printf("\nerrono (%d)\n", errno); + return 0; + + case ENOBUFS: + printf("\nUnexpected error ENOBUFS (%d)\n", ENOBUFS); + return NET_SOFTERROR; + + default: + printf("\nUnexpected error (%d)\n", errno); + return NET_HARDERROR; + } + } + return r; +} +#endif /* HAVE_UDP_SEGMENT */ int has_sendfile(void) diff --git a/src/net.h b/src/net.h index 026dfd030..de2554fc8 100644 --- a/src/net.h +++ b/src/net.h @@ -41,6 +41,12 @@ int Nsendfile(int fromfd, int tofd, const char *buf, size_t count) /* __attribut int setnonblocking(int fd, int nonblocking); int getsockdomain(int sock); int parse_qos(const char *tos); +#ifdef HAVE_UDP_GRO +int Nread_gro(int fd, char *buf, size_t count, int prot, int *dgram_sz); +#endif +#ifdef HAVE_UDP_SEGMENT +int Nwrite_gso(int fd, const char *buf, size_t count, int prot, uint16_t gso_size); +#endif #define NET_SOFTERROR -1 #define NET_HARDERROR -2