|
18 | 18 | * Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
|
19 | 19 | * Copyright (c) 2015-2017 Research Organization for Information Science
|
20 | 20 | * and Technology (RIST). All rights reserved.
|
| 21 | + * Copyright (c) 2017 IBM Corporation. All rights reserved. |
21 | 22 | * $COPYRIGHT$
|
22 | 23 | *
|
23 | 24 | * Additional copyrights may follow
|
@@ -417,27 +418,26 @@ static int tcp_component_register(void)
|
417 | 418 | &mca_oob_tcp_component.disable_ipv6_family);
|
418 | 419 | #endif // OPAL_ENABLE_IPV6
|
419 | 420 |
|
420 |
| - // Default to keepalives every 60 seconds |
421 |
| - mca_oob_tcp_component.keepalive_time = 60; |
| 421 | + // Wait for this amount of time before sending the first keepalive probe |
| 422 | + mca_oob_tcp_component.keepalive_time = 300; |
422 | 423 | (void)mca_base_component_var_register(component, "keepalive_time",
|
423 | 424 | "Idle time in seconds before starting to send keepalives (keepalive_time <= 0 disables keepalive functionality)",
|
424 | 425 | MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
425 | 426 | OPAL_INFO_LVL_5,
|
426 | 427 | MCA_BASE_VAR_SCOPE_READONLY,
|
427 | 428 | &mca_oob_tcp_component.keepalive_time);
|
428 | 429 |
|
429 |
| - // Default to keepalive retry interval time of 5 seconds |
430 |
| - mca_oob_tcp_component.keepalive_intvl = 5; |
| 430 | + // Resend keepalive probe every INT seconds |
| 431 | + mca_oob_tcp_component.keepalive_intvl = 20; |
431 | 432 | (void)mca_base_component_var_register(component, "keepalive_intvl",
|
432 | 433 | "Time between successive keepalive pings when peer has not responded, in seconds (ignored if keepalive_time <= 0)",
|
433 | 434 | MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
434 | 435 | OPAL_INFO_LVL_5,
|
435 | 436 | MCA_BASE_VAR_SCOPE_READONLY,
|
436 | 437 | &mca_oob_tcp_component.keepalive_intvl);
|
437 | 438 |
|
438 |
| - // Default to retrying a keepalive 3 times before declaring the |
439 |
| - // peer kaput |
440 |
| - mca_oob_tcp_component.keepalive_probes = 3; |
| 439 | + // After sending PR probes every INT seconds consider the connection dead |
| 440 | + mca_oob_tcp_component.keepalive_probes = 9; |
441 | 441 | (void)mca_base_component_var_register(component, "keepalive_probes",
|
442 | 442 | "Number of keepalives that can be missed before declaring error (ignored if keepalive_time <= 0)",
|
443 | 443 | MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
0 commit comments