Skip to content

Commit 5b1647c

Browse files
RobPasMuepyansys-ci-botpre-commit-ci[bot]
authored
feat: adapt health check timeout algorithm (#1559)
Co-authored-by: pyansys-ci-bot <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 7ab4a01 commit 5b1647c

File tree

2 files changed

+25
-9
lines changed

2 files changed

+25
-9
lines changed

doc/changelog.d/1559.added.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
adapt health check timeout algorithm

src/ansys/geometry/core/connection/client.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,15 @@ def wait_until_healthy(channel: grpc.Channel, timeout: float):
7373
channel : ~grpc.Channel
7474
Channel that must be established and healthy.
7575
timeout : float
76-
Timeout in seconds. An attempt is made every 100 milliseconds
77-
until the timeout is exceeded.
76+
Timeout in seconds. Attempts are made with the following backoff strategy:
77+
78+
* Starts with 0.1 seconds.
79+
* If the attempt fails, double the timeout.
80+
* This is repeated until the next timeoff exceeds the
81+
value for the remaining time. In that case, a final attempt
82+
is made with the remaining time.
83+
* If the total elapsed time exceeds the value for the ``timeout`` parameter,
84+
a ``TimeoutError`` is raised.
7885
7986
Raises
8087
------
@@ -84,12 +91,21 @@ def wait_until_healthy(channel: grpc.Channel, timeout: float):
8491
t_max = time.time() + timeout
8592
health_stub = health_pb2_grpc.HealthStub(channel)
8693
request = health_pb2.HealthCheckRequest(service="")
94+
95+
t_out = 0.1
8796
while time.time() < t_max:
8897
try:
89-
out = health_stub.Check(request, timeout=0.1)
98+
out = health_stub.Check(request, timeout=t_out)
9099
if out.status is health_pb2.HealthCheckResponse.SERVING:
91100
break
92101
except _InactiveRpcError:
102+
# Duplicate timeout and try again
103+
t_now = time.time()
104+
t_out *= 2
105+
# If we have time to try again, continue.. but if we don't,
106+
# just try for the remaining time
107+
if t_now + t_out > t_max:
108+
t_out = t_max - t_now
93109
continue
94110
else:
95111
target_str = channel._channel.target().decode()
@@ -171,7 +187,8 @@ def __init__(
171187
)
172188

173189
# do not finish initialization until channel is healthy
174-
wait_until_healthy(self._channel, timeout)
190+
self._grpc_health_timeout = timeout
191+
wait_until_healthy(self._channel, self._grpc_health_timeout)
175192

176193
# once connection with the client is established, create a logger
177194
self._log = LOG.add_instance_logger(
@@ -275,12 +292,10 @@ def healthy(self) -> bool:
275292
"""Flag indicating whether the client channel is healthy."""
276293
if self._closed:
277294
return False
278-
health_stub = health_pb2_grpc.HealthStub(self._channel)
279-
request = health_pb2.HealthCheckRequest(service="")
280295
try:
281-
out = health_stub.Check(request, timeout=0.1)
282-
return out.status is health_pb2.HealthCheckResponse.SERVING
283-
except _InactiveRpcError: # pragma: no cover
296+
wait_until_healthy(self._channel, self._grpc_health_timeout)
297+
return True
298+
except TimeoutError: # pragma: no cover
284299
return False
285300

286301
def __repr__(self) -> str:

0 commit comments

Comments
 (0)