diff --git a/charts/kms-core/Chart.yaml b/charts/kms-core/Chart.yaml index 5c0402d8a..2edd64019 100644 --- a/charts/kms-core/Chart.yaml +++ b/charts/kms-core/Chart.yaml @@ -1,6 +1,6 @@ name: kms-core description: A helm chart to distribute and deploy the Zama KMS core service. -version: 1.5.0-beta.1 +version: 1.5.0-beta.2 appVersion: 0.13.0 # Minimum kms version to run this chart apiVersion: v2 keywords: diff --git a/charts/kms-core/templates/kms-core-configmap.yaml b/charts/kms-core/templates/kms-core-configmap.yaml index b023c880a..dfd136e24 100644 --- a/charts/kms-core/templates/kms-core-configmap.yaml +++ b/charts/kms-core/templates/kms-core-configmap.yaml @@ -93,6 +93,9 @@ data: listen_port = {{ int .Values.kmsCore.ports.client }} timeout_secs = {{ int .Values.kmsCore.resources.limits.grpcTimeout }} grpc_max_message_size = {{ int .Values.kmsCore.resources.limits.grpcMaxMessageSize }} + http2_keep_alive_interval_secs = {{ int .Values.kmsCore.resources.keepalive.Http2KeepAliveIntervalSecs }} + http2_keep_alive_timeout_secs = {{ int .Values.kmsCore.resources.keepalive.Http2KeepAliveTimeoutSecs }} + tcp_keep_alive_secs = {{ int .Values.kmsCore.resources.keepalive.TcpKeepAliveSecs }} {{ if .Values.kmsCore.thresholdMode.enabled }} [threshold] @@ -123,6 +126,9 @@ data: discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = {{ int .Values.kmsCore.thresholdMode.maxOpenedInactiveSessionsPerParty }} + http2_keep_alive_interval_secs = {{ int .Values.kmsCore.thresholdMode.Http2KeepAliveIntervalSecs }} + http2_keep_alive_timeout_secs = {{ int .Values.kmsCore.thresholdMode.Http2KeepAliveTimeoutSecs }} + tcp_keep_alive_secs = {{ int .Values.kmsCore.thresholdMode.TcpKeepAliveSecs }} {{- if $.Values.kmsCore.thresholdMode.tls.enabled }} {{- if $.Values.kmsCore.nitroEnclave.enabled }} diff --git a/charts/kms-core/values.yaml b/charts/kms-core/values.yaml index b7ccaa6ac..a007c7f37 100644 --- a/charts/kms-core/values.yaml +++ b/charts/kms-core/values.yaml @@ -101,6 +101,9 @@ kmsCore: refreshIntervalMs: 5000 tokioWorkerThreads: 10 rayonNumThreads: 40 + Http2KeepAliveIntervalSecs: 30 + Http2KeepAliveTimeoutSecs: 10 + TcpKeepAliveSecs: 30 # Threshold value is the number of corruptions that the protocol handles. # 1 for 4 parties, 4 for 13 parties thresholdValue: 4 @@ -201,6 +204,10 @@ kmsCore: grpcTimeout: 360 # 100MB grpcMaxMessageSize: 104857600 + keepalive: + Http2KeepAliveIntervalSecs: 30 + Http2KeepAliveTimeoutSecs: 10 + TcpKeepAliveSecs: 30 nodeSelector: affinity: tolerations: diff --git a/core/service/config/compose_1.toml b/core/service/config/compose_1.toml index 2ee96f3ae..f551ebf46 100644 --- a/core/service/config/compose_1.toml +++ b/core/service/config/compose_1.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50100 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] diff --git a/core/service/config/compose_2.toml b/core/service/config/compose_2.toml index c4ae82d6c..16aa04c09 100644 --- a/core/service/config/compose_2.toml +++ b/core/service/config/compose_2.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50200 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/compose_3.toml b/core/service/config/compose_3.toml index 72b38f5d5..c2c276ec0 100644 --- a/core/service/config/compose_3.toml +++ b/core/service/config/compose_3.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50300 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/compose_4.toml b/core/service/config/compose_4.toml index c36b7abe3..eb2af903f 100644 --- a/core/service/config/compose_4.toml +++ b/core/service/config/compose_4.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50400 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/compose_5.toml b/core/service/config/compose_5.toml index 2f7a94931..498236a61 100644 --- a/core/service/config/compose_5.toml +++ b/core/service/config/compose_5.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50500 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/compose_6.toml b/core/service/config/compose_6.toml index a13032afb..9f1728f31 100644 --- a/core/service/config/compose_6.toml +++ b/core/service/config/compose_6.toml @@ -7,6 +7,9 @@ listen_address = "0.0.0.0" listen_port = 50600 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -86,6 +89,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/compose_centralized.toml b/core/service/config/compose_centralized.toml index 5f3b4cdd7..a27636d3d 100644 --- a/core/service/config/compose_centralized.toml +++ b/core/service/config/compose_centralized.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50051 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [telemetry] tracing_service_name = "kms-centralized" diff --git a/core/service/config/default_1.toml b/core/service/config/default_1.toml index e1564954f..568fa1eec 100644 --- a/core/service/config/default_1.toml +++ b/core/service/config/default_1.toml @@ -16,6 +16,10 @@ timeout_secs = 360 # Maximum gRPC message size in bytes. grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 + # Set the AWS region and endpoint used by [public_vault.storage] # and/or [private_vault.storage], if the storage URL starts with "s3://". # If the storage URL is file-based, this configuration is ignored. @@ -193,6 +197,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # Redis instance for preprocessed material (unset by default). # [threshold.preproc_redis] diff --git a/core/service/config/default_2.toml b/core/service/config/default_2.toml index ed1a12c9b..998259b46 100644 --- a/core/service/config/default_2.toml +++ b/core/service/config/default_2.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50200 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -76,6 +79,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/default_3.toml b/core/service/config/default_3.toml index c5f933df0..0711f5629 100644 --- a/core/service/config/default_3.toml +++ b/core/service/config/default_3.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50300 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -76,6 +79,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/default_4.toml b/core/service/config/default_4.toml index d3fbe0c27..ca54dae80 100644 --- a/core/service/config/default_4.toml +++ b/core/service/config/default_4.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50400 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [aws] region = "us-east-1" @@ -76,6 +79,9 @@ session_cleanup_interval_secs = 3600 discard_inactive_sessions_interval = 900 max_waiting_time_for_message_queue = 60 max_opened_inactive_sessions_per_party = 100 +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 # [threshold.preproc_redis] # host = "redis://127.0.0.1" diff --git a/core/service/config/default_centralized.toml b/core/service/config/default_centralized.toml index 130b525cd..fe1af93a0 100644 --- a/core/service/config/default_centralized.toml +++ b/core/service/config/default_centralized.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50051 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [telemetry] tracing_service_name = "kms-centralized" diff --git a/core/service/config/default_centralized_enclave.toml b/core/service/config/default_centralized_enclave.toml index e12743227..505695617 100644 --- a/core/service/config/default_centralized_enclave.toml +++ b/core/service/config/default_centralized_enclave.toml @@ -5,6 +5,9 @@ listen_address = "0.0.0.0" listen_port = 50051 timeout_secs = 360 grpc_max_message_size = 104857600 # 100 MiB +http2_keep_alive_interval_secs = 30 +http2_keep_alive_timeout_secs = 10 +tcp_keep_alive_secs = 30 [telemetry] tracing_service_name = "kms-centralized" diff --git a/core/service/src/client/test_tools.rs b/core/service/src/client/test_tools.rs index f61a05f0d..b65d0be3b 100644 --- a/core/service/src/client/test_tools.rs +++ b/core/service/src/client/test_tools.rs @@ -107,6 +107,9 @@ pub async fn setup_threshold_no_client< listen_port: service_ports[i - 1], timeout_secs: 60u64, grpc_max_message_size: GRPC_MAX_MESSAGE_SIZE, + http2_keep_alive_interval_secs: Some(30), + http2_keep_alive_timeout_secs: Some(10), + tcp_keep_alive_secs: Some(30), }; let mpc_conf = mpc_confs.clone(); @@ -439,6 +442,9 @@ pub async fn setup_centralized_no_client< listen_port, timeout_secs: 360, grpc_max_message_size: GRPC_MAX_MESSAGE_SIZE, + http2_keep_alive_interval_secs: Some(30), + http2_keep_alive_timeout_secs: Some(10), + tcp_keep_alive_secs: Some(30), }; run_server( diff --git a/core/service/src/conf/mod.rs b/core/service/src/conf/mod.rs index 427350848..ca1b68675 100644 --- a/core/service/src/conf/mod.rs +++ b/core/service/src/conf/mod.rs @@ -76,6 +76,12 @@ pub struct ServiceEndpoint { // maximum gRPC message size in bytes #[validate(range(min = 1, max = 2147483647))] pub grpc_max_message_size: usize, + #[validate(range(min = 1))] + pub http2_keep_alive_interval_secs: Option, + #[validate(range(min = 1))] + pub http2_keep_alive_timeout_secs: Option, + #[validate(range(min = 1))] + pub tcp_keep_alive_secs: Option, } pub trait ConfigTracing { diff --git a/core/service/src/engine/server.rs b/core/service/src/engine/server.rs index 56d8c40d7..730722383 100644 --- a/core/service/src/engine/server.rs +++ b/core/service/src/engine/server.rs @@ -117,6 +117,17 @@ pub async fn run_server< ); let server = Server::builder() .http2_adaptive_window(Some(true)) + .http2_keepalive_interval( + config + .http2_keep_alive_interval_secs + .map(Duration::from_secs), + ) + .http2_keepalive_timeout( + config + .http2_keep_alive_timeout_secs + .map(Duration::from_secs), + ) + .tcp_keepalive(config.tcp_keep_alive_secs.map(Duration::from_secs)) .layer(trace_request) // Make sure we never abort because we spent too much time on the blocking part of the get result // as we mean to do it. diff --git a/core/service/src/engine/threshold/service/kms_impl.rs b/core/service/src/engine/threshold/service/kms_impl.rs index 84a824902..69bd12802 100644 --- a/core/service/src/engine/threshold/service/kms_impl.rs +++ b/core/service/src/engine/threshold/service/kms_impl.rs @@ -28,7 +28,7 @@ use threshold_fhe::{ zk::ceremony::SecureCeremony, }, networking::{ - grpc::{GrpcNetworkingManager, GrpcServer, TlsExtensionGetter}, + grpc::{GrpcNetworkingManager, GrpcServer, OptionConfigWrapper, TlsExtensionGetter}, tls::AttestedVerifier, }, }; @@ -280,8 +280,12 @@ where .write() .await .new_server(TlsExtensionGetter::SslConnectInfo); + let p2p_config: OptionConfigWrapper = config.core_to_core_net.into(); let router = Server::builder() .http2_adaptive_window(Some(true)) + .http2_keepalive_interval(Some(p2p_config.get_http2_keep_alive_interval())) + .http2_keepalive_timeout(Some(p2p_config.get_http2_keep_alive_timeout())) + .tcp_keepalive(Some(p2p_config.get_tcp_keep_alive())) .add_service(networking_server) .add_service(threshold_health_service); diff --git a/core/threshold/src/networking/constants.rs b/core/threshold/src/networking/constants.rs index d0a5812e8..625c435db 100644 --- a/core/threshold/src/networking/constants.rs +++ b/core/threshold/src/networking/constants.rs @@ -25,6 +25,11 @@ pub(crate) const DISCARD_INACTIVE_SESSION_INTERVAL_SECS: u64 = 15 * 60; // The default maximum waiting time we wait for trying to push the message in the queue pub(crate) const MAX_WAITING_TIME_MESSAGE_QUEUE: u64 = 60; + +pub(crate) const HTTP2_KEEP_ALIVE_INTERVAL_SEC: u64 = 30; +pub(crate) const HTTP2_KEEP_ALIVE_TIMEOUT_SEC: u64 = 10; +pub(crate) const TCP_KEEP_ALIVE_SEC: u64 = 30; + lazy_static! { /// The default maximum internal between retries (Cap at 60s intervals) pub static ref MAX_INTERVAL: Duration = Duration::from_secs(60); diff --git a/core/threshold/src/networking/grpc.rs b/core/threshold/src/networking/grpc.rs index c347fe8f7..808b173ea 100644 --- a/core/threshold/src/networking/grpc.rs +++ b/core/threshold/src/networking/grpc.rs @@ -7,11 +7,12 @@ use super::tls::extract_subject_from_cert; use super::NetworkMode; use crate::execution::runtime::party::{MpcIdentity, RoleAssignment, RoleKind, RoleTrait}; use crate::networking::constants::{ - DISCARD_INACTIVE_SESSION_INTERVAL_SECS, INITIAL_INTERVAL_MS, MAX_ELAPSED_TIME, + DISCARD_INACTIVE_SESSION_INTERVAL_SECS, HTTP2_KEEP_ALIVE_INTERVAL_SEC, + HTTP2_KEEP_ALIVE_TIMEOUT_SEC, INITIAL_INTERVAL_MS, MAX_ELAPSED_TIME, MAX_EN_DECODE_MESSAGE_SIZE, MAX_INTERVAL, MAX_OPENED_INACTIVE_SESSIONS_PER_PARTY, MAX_WAITING_TIME_MESSAGE_QUEUE, MESSAGE_LIMIT, MULTIPLIER, NETWORK_TIMEOUT_ASYNC, NETWORK_TIMEOUT_BK, NETWORK_TIMEOUT_BK_SNS, NETWORK_TIMEOUT_LONG, - SESSION_CLEANUP_INTERVAL_SECS, SESSION_STATUS_UPDATE_INTERVAL_SECS, + SESSION_CLEANUP_INTERVAL_SECS, SESSION_STATUS_UPDATE_INTERVAL_SECS, TCP_KEEP_ALIVE_SEC, }; use crate::networking::health_check::HealthCheckSession; use crate::networking::Networking; @@ -55,6 +56,9 @@ pub struct CoreToCoreNetworkConfig { pub max_waiting_time_for_message_queue: Option, /// Maximum number of "Inactive" sessions a party can open before I refuse to open more (default: 100) pub max_opened_inactive_sessions_per_party: Option, + pub http2_keep_alive_interval_secs: Option, + pub http2_keep_alive_timeout_secs: Option, + pub tcp_keep_alive_secs: Option, } #[derive(Debug, Clone, Copy)] @@ -62,6 +66,18 @@ pub struct OptionConfigWrapper { pub conf: Option, } +impl From> for OptionConfigWrapper { + fn from(val: Option) -> Self { + OptionConfigWrapper { conf: val } + } +} + +impl From for OptionConfigWrapper { + fn from(val: CoreToCoreNetworkConfig) -> Self { + OptionConfigWrapper { conf: Some(val) } + } +} + impl OptionConfigWrapper { pub fn get_message_limit(&self) -> usize { if let Some(conf) = self.conf { @@ -191,6 +207,36 @@ impl OptionConfigWrapper { Duration::from_secs(MAX_WAITING_TIME_MESSAGE_QUEUE) // Default to 60 seconds if not specified } } + + pub fn get_http2_keep_alive_interval(&self) -> Duration { + if let Some(conf) = self.conf { + Duration::from_secs( + conf.http2_keep_alive_interval_secs + .unwrap_or(HTTP2_KEEP_ALIVE_INTERVAL_SEC), + ) + } else { + Duration::from_secs(HTTP2_KEEP_ALIVE_INTERVAL_SEC) + } + } + + pub fn get_http2_keep_alive_timeout(&self) -> Duration { + if let Some(conf) = self.conf { + Duration::from_secs( + conf.http2_keep_alive_timeout_secs + .unwrap_or(HTTP2_KEEP_ALIVE_TIMEOUT_SEC), + ) + } else { + Duration::from_secs(HTTP2_KEEP_ALIVE_TIMEOUT_SEC) + } + } + + pub fn get_tcp_keep_alive(&self) -> Duration { + if let Some(conf) = self.conf { + Duration::from_secs(conf.tcp_keep_alive_secs.unwrap_or(TCP_KEEP_ALIVE_SEC)) + } else { + Duration::from_secs(TCP_KEEP_ALIVE_SEC) + } + } } //TODO: Most likely need this to create NetworkStack instead of GrpcNetworking diff --git a/core/threshold/src/networking/sending_service.rs b/core/threshold/src/networking/sending_service.rs index 0ec51743f..880934ac7 100644 --- a/core/threshold/src/networking/sending_service.rs +++ b/core/threshold/src/networking/sending_service.rs @@ -175,6 +175,10 @@ impl GrpcSendingService { // then this should be changed let endpoint = Channel::builder(endpoint) .http2_adaptive_window(true) + .keep_alive_while_idle(true) + .http2_keep_alive_interval(self.config.get_http2_keep_alive_interval()) + .keep_alive_timeout(self.config.get_http2_keep_alive_timeout()) + .tcp_keepalive(Some(self.config.get_tcp_keep_alive())) .tcp_nodelay(true); // we have to pass a custom TLS connector to // tonic::transport::Channel to be able to use a custom rustls @@ -195,6 +199,10 @@ impl GrpcSendingService { // then this should be changed Channel::builder(endpoint) .http2_adaptive_window(true) + .keep_alive_while_idle(true) + .http2_keep_alive_interval(self.config.get_http2_keep_alive_interval()) + .keep_alive_timeout(self.config.get_http2_keep_alive_timeout()) + .tcp_keepalive(Some(self.config.get_tcp_keep_alive())) .tcp_nodelay(true) .connect_lazy() } diff --git a/docker/core/service/init_enclave.sh b/docker/core/service/init_enclave.sh index ad2f00b6e..e82e5999d 100755 --- a/docker/core/service/init_enclave.sh +++ b/docker/core/service/init_enclave.sh @@ -8,6 +8,9 @@ CONFIG_PORT=4000 TOKEN_PORT=4100 KMS_SERVER_CONFIG_FILE="config.toml" AWS_WEB_IDENTITY_TOKEN_FILE="token" +KEEPIDLE=30 +KEEPINTVL=10 +KEEPCNT=3 export AWS_WEB_IDENTITY_TOKEN_FILE logger() { @@ -45,9 +48,9 @@ start_tcp_proxy_out() { local NAME="$1" local PORT="$2" log "starting enclave-side $NAME proxy" - socat -T60 \ - TCP-LISTEN:"$PORT",fork,nodelay,reuseaddr \ - VSOCK-CONNECT:$PARENT_CID:"$PORT" \ + socat \ + TCP-LISTEN:"$PORT",fork,nodelay,reuseaddr,keepalive,keepidle="$KEEPIDLE",keepintvl="$KEEPINTVL",keepcnt="$KEEPCNT" \ + VSOCK-CONNECT:$PARENT_CID:"$PORT",keepalive \ |& logger & } @@ -55,9 +58,9 @@ start_tcp_proxy_in() { local NAME="$1" local PORT="$2" log "starting enclave-side $NAME proxy" - socat -T60 \ - VSOCK-LISTEN:"$PORT",fork,reuseaddr \ - TCP:127.0.0.1:"$PORT",nodelay \ + socat \ + VSOCK-LISTEN:"$PORT",fork,reuseaddr,keepalive \ + TCP:127.0.0.1:"$PORT",nodelay,keepalive,keepidle="$KEEPIDLE",keepintvl="$KEEPINTVL",keepcnt="$KEEPCNT" \ |& logger & } diff --git a/docker/core/service/start_parent_proxies.sh b/docker/core/service/start_parent_proxies.sh index e4c6124e8..cf45f7a2c 100644 --- a/docker/core/service/start_parent_proxies.sh +++ b/docker/core/service/start_parent_proxies.sh @@ -14,6 +14,9 @@ ENCLAVE_CID="$1" ENCLAVE_LOG_PORT="$2" ENCLAVE_CONFIG_PORT="$3" KMS_SERVER_CONFIG_FILE="$4" +KEEPIDLE=30 +KEEPINTVL=10 +KEEPCNT=3 get_configured_host_and_port() { local SERVICE_NAME="$1" @@ -39,14 +42,14 @@ start_tcp_proxy_out() { local VSOCK_PORT="$2" local TCP_DST="$3" echo "start_proxies: starting parent-side $NAME proxy" - socat -T60 VSOCK-LISTEN:"$VSOCK_PORT",fork,reuseaddr TCP:"$TCP_DST",nodelay & + socat VSOCK-LISTEN:"$VSOCK_PORT",fork,reuseaddr,keepalive TCP:"$TCP_DST",nodelay,keepalive,keepidle="$KEEPIDLE",keepintvl="$KEEPINTVL",keepcnt="$KEEPCNT" & } start_tcp_proxy_in() { local NAME="$1" local PORT="$2" echo "start_proxies: starting parent-side $NAME proxy" - socat -T60 TCP-LISTEN:"$PORT",fork,nodelay,reuseaddr VSOCK-CONNECT:"$ENCLAVE_CID":"$PORT" + socat TCP-LISTEN:"$PORT",fork,nodelay,reuseaddr,keepalive,keepidle="$KEEPIDLE",keepintvl="$KEEPINTVL",keepcnt="$KEEPCNT" VSOCK-CONNECT:"$ENCLAVE_CID":"$PORT",keepalive } # start the log stream for the enclave