Skip to content

Commit bef539e

Browse files
jluebbeBastian-Krause
authored andcommitted
remote: rework grpc timeout configuration
It's not really clear how keepalive_timeout_ms and the ping_timeout_ms experiment should interact, so we set them both. Signed-off-by: Jan Luebbe <[email protected]>
1 parent fc7b802 commit bef539e

File tree

3 files changed

+43
-12
lines changed

3 files changed

+43
-12
lines changed

labgrid/remote/client.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,21 @@ def __attrs_post_init__(self):
8686
"""Actions which are executed if a connection is successfully opened."""
8787
self.stopping = asyncio.Event()
8888

89-
self.channel = grpc.aio.insecure_channel(self.address)
89+
# It seems since https://github.com/grpc/grpc/pull/34647, the
90+
# ping_timeout_ms default of 60 seconds overrides keepalive_timeout_ms,
91+
# so set it as well.
92+
# Use GRPC_VERBOSITY=DEBUG GRPC_TRACE=http_keepalive for debugging.
93+
channel_options = [
94+
("grpc.keepalive_time_ms", 7500), # 7.5 seconds
95+
("grpc.keepalive_timeout_ms", 10000), # 10 seconds
96+
("grpc.http2.ping_timeout_ms", 10000), # 10 seconds
97+
("grpc.http2.max_pings_without_data", 0), # no limit
98+
]
99+
100+
self.channel = grpc.aio.insecure_channel(
101+
target=self.address,
102+
options=channel_options,
103+
)
90104
self.stub = labgrid_coordinator_pb2_grpc.CoordinatorStub(self.channel)
91105

92106
self.out_queue = asyncio.Queue()

labgrid/remote/coordinator.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -959,17 +959,20 @@ async def GetReservations(self, request: labgrid_coordinator_pb2.GetReservations
959959

960960

961961
async def serve(listen, cleanup) -> None:
962+
# It seems since https://github.com/grpc/grpc/pull/34647, the
963+
# ping_timeout_ms default of 60 seconds overrides keepalive_timeout_ms,
964+
# so set it as well.
965+
# Use GRPC_VERBOSITY=DEBUG GRPC_TRACE=http_keepalive for debugging.
966+
channel_options = [
967+
("grpc.keepalive_time_ms", 10000), # 10 seconds
968+
("grpc.keepalive_timeout_ms", 10000), # 10 seconds
969+
("grpc.http2.ping_timeout_ms", 15000), # 15 seconds
970+
("grpc.http2.min_ping_interval_without_data_ms", 5000),
971+
("grpc.http2.max_pings_without_data", 0), # no limit
972+
("grpc.keepalive_permit_without_calls", 1), # allow keepalive pings even when there are no calls
973+
]
962974
server = grpc.aio.server(
963-
options=[
964-
("grpc.keepalive_time_ms", 30000), # Send keepalive ping every 30 seconds
965-
(
966-
"grpc.keepalive_timeout_ms",
967-
10000,
968-
), # Wait 10 seconds for ping ack before considering the connection dead
969-
("grpc.http2.min_time_between_pings_ms", 15000), # Minimum amount of time between pings
970-
("grpc.http2.max_pings_without_data", 0), # Allow pings even without active streams
971-
("grpc.keepalive_permit_without_calls", 1), # Allow keepalive pings even when there are no calls
972-
],
975+
options=channel_options,
973976
)
974977
coordinator = Coordinator()
975978
labgrid_coordinator_pb2_grpc.add_CoordinatorServicer_to_server(coordinator, server)

labgrid/remote/exporter.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -783,11 +783,25 @@ def __init__(self, config) -> None:
783783
self.hostname = config["hostname"]
784784
self.isolated = config["isolated"]
785785

786+
# It seems since https://github.com/grpc/grpc/pull/34647, the
787+
# ping_timeout_ms default of 60 seconds overrides keepalive_timeout_ms,
788+
# so set it as well.
789+
# Use GRPC_VERBOSITY=DEBUG GRPC_TRACE=http_keepalive for debugging.
790+
channel_options = [
791+
("grpc.keepalive_time_ms", 7500), # 7.5 seconds
792+
("grpc.keepalive_timeout_ms", 10000), # 10 seconds
793+
("grpc.http2.ping_timeout_ms", 10000), # 10 seconds
794+
("grpc.http2.max_pings_without_data", 0), # no limit
795+
]
796+
786797
# default to port 20408 if not specified
787798
if urlsplit(f"//{config['coordinator']}").port is None:
788799
config["coordinator"] += ":20408"
789800

790-
self.channel = grpc.aio.insecure_channel(config["coordinator"])
801+
self.channel = grpc.aio.insecure_channel(
802+
target=config["coordinator"],
803+
options=channel_options,
804+
)
791805
self.stub = labgrid_coordinator_pb2_grpc.CoordinatorStub(self.channel)
792806
self.out_queue = asyncio.Queue()
793807
self.pump_task = None

0 commit comments

Comments
 (0)