Skip to content

Commit af7e2cc

Browse files
author
Ralph Castain
authored
Merge pull request #3004 from jjhursey/topic/oob-tcp-timeout
oob/tcp: Adjust TCP keepalive default values
2 parents 26c366a + df0f8e9 commit af7e2cc

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

orte/mca/oob/tcp/oob_tcp_component.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
1919
* Copyright (c) 2015-2017 Research Organization for Information Science
2020
* and Technology (RIST). All rights reserved.
21+
* Copyright (c) 2017 IBM Corporation. All rights reserved.
2122
* $COPYRIGHT$
2223
*
2324
* Additional copyrights may follow
@@ -417,27 +418,26 @@ static int tcp_component_register(void)
417418
&mca_oob_tcp_component.disable_ipv6_family);
418419
#endif // OPAL_ENABLE_IPV6
419420

420-
// Default to keepalives every 60 seconds
421-
mca_oob_tcp_component.keepalive_time = 60;
421+
// Wait for this amount of time before sending the first keepalive probe
422+
mca_oob_tcp_component.keepalive_time = 300;
422423
(void)mca_base_component_var_register(component, "keepalive_time",
423424
"Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables keepalive functionality)",
424425
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
425426
OPAL_INFO_LVL_5,
426427
MCA_BASE_VAR_SCOPE_READONLY,
427428
&mca_oob_tcp_component.keepalive_time);
428429

429-
// Default to keepalive retry interval time of 5 seconds
430-
mca_oob_tcp_component.keepalive_intvl = 5;
430+
// Resend keepalive probe every INT seconds
431+
mca_oob_tcp_component.keepalive_intvl = 20;
431432
(void)mca_base_component_var_register(component, "keepalive_intvl",
432433
"Time between successive keepalive pings when peer has not responded, in seconds (ignored if keepalive_time <= 0)",
433434
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
434435
OPAL_INFO_LVL_5,
435436
MCA_BASE_VAR_SCOPE_READONLY,
436437
&mca_oob_tcp_component.keepalive_intvl);
437438

438-
// Default to retrying a keepalive 3 times before declaring the
439-
// peer kaput
440-
mca_oob_tcp_component.keepalive_probes = 3;
439+
// After sending PR probes every INT seconds consider the connection dead
440+
mca_oob_tcp_component.keepalive_probes = 9;
441441
(void)mca_base_component_var_register(component, "keepalive_probes",
442442
"Number of keepalives that can be missed before declaring error (ignored if keepalive_time <= 0)",
443443
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,

0 commit comments

Comments
 (0)