Skip to content

Commit fe3ea41

Browse files
GitHKAndrei Neagu
andauthored
Fix resource manger alive keys always have a TTL (#1848)
* alive key will always expire * adjusted tests and added new test for alive key * fixed wrong number of arguments * making sure tests pass Co-authored-by: Andrei Neagu <[email protected]>
1 parent 0ef11c4 commit fe3ea41

File tree

3 files changed

+46
-16
lines changed

3 files changed

+46
-16
lines changed

services/web/server/src/simcore_service_webserver/resource_manager/registry.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030

3131
@attr.s(auto_attribs=True)
3232
class RedisResourceRegistry:
33-
""" Keeps a record of connected sockets per user
33+
"""Keeps a record of connected sockets per user
3434
35-
redis structure is following
36-
Redis Hash: key=user_id:client_session_id values={server_id socket_id project_id}
35+
redis structure is following
36+
Redis Hash: key=user_id:client_session_id values={server_id socket_id project_id}
3737
"""
3838

3939
app: web.Application
@@ -92,12 +92,12 @@ async def find_keys(self, resource: Tuple[str, str]) -> List[Dict[str, str]]:
9292
keys.append(self._decode_hash_key(hash_key))
9393
return keys
9494

95-
async def set_key_alive(
96-
self, key: Dict[str, str], alive: bool, timeout: int = 0
97-
) -> None:
95+
async def set_key_alive(self, key: Dict[str, str], timeout: int) -> None:
96+
# setting the timeout to always expire, timeout > 0
97+
timeout = int(max(1, timeout))
9898
client = get_redis_client(self.app)
9999
hash_key = f"{self._hash_key(key)}:{ALIVE_SUFFIX}"
100-
await client.set(hash_key, 1, expire=0 if alive else timeout)
100+
await client.set(hash_key, 1, expire=timeout)
101101

102102
async def is_key_alive(self, key: Dict[str, str]) -> bool:
103103
client = get_redis_client(self.app)

services/web/server/src/simcore_service_webserver/resource_manager/websocket_manager.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ async def set_socket_id(self, socket_id: str) -> None:
5454
)
5555
registry = get_registry(self.app)
5656
await registry.set_resource(self._resource_key(), (SOCKET_ID_KEY, socket_id))
57-
await registry.set_key_alive(self._resource_key(), True)
57+
# hearthbeat is not emulated in tests, make sure that with very small GC intervals
58+
# the resources do not result as timeout; this value is usually in the order of minutes
59+
timeout = max(3, get_service_deletion_timeout(self.app))
60+
await registry.set_key_alive(self._resource_key(), timeout)
5861

5962
async def get_socket_id(self) -> Optional[str]:
6063
log.debug(
@@ -70,7 +73,7 @@ async def user_pressed_disconnect(self) -> None:
7073
"""When the user disconnects expire as soon as possible the alive key
7174
to ensure garbage collection will trigger in the next 2 cycles."""
7275
registry = get_registry(self.app)
73-
await registry.set_key_alive(self._resource_key(), False, 1)
76+
await registry.set_key_alive(self._resource_key(), 1)
7477

7578
async def remove_socket_id(self) -> None:
7679
log.debug(
@@ -81,14 +84,14 @@ async def remove_socket_id(self) -> None:
8184
registry = get_registry(self.app)
8285
await registry.remove_resource(self._resource_key(), SOCKET_ID_KEY)
8386
await registry.set_key_alive(
84-
self._resource_key(), False, get_service_deletion_timeout(self.app)
87+
self._resource_key(), get_service_deletion_timeout(self.app)
8588
)
8689

8790
async def set_heartbeat(self) -> None:
8891
"""Refreshes heartbeat """
8992
registry = get_registry(self.app)
9093
await registry.set_key_alive(
91-
self._resource_key(), False, get_service_deletion_timeout(self.app)
94+
self._resource_key(), get_service_deletion_timeout(self.app)
9295
)
9396

9497
async def find_socket_ids(self) -> List[str]:

services/web/server/tests/unit/with_dbs/fast/test_redis_registry.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,21 @@ async def test_redis_registry(loop, redis_registry):
110110
assert all(x in [key, second_key] for x in found_keys)
111111
assert not await redis_registry.find_keys(invalid_resource)
112112

113-
# create alive key
114-
await redis_registry.set_key_alive(key, True)
113+
DEAD_KEY_TIMEOUT = 1
114+
STILL_ALIVE_KEY_TIMEOUT = DEAD_KEY_TIMEOUT + 1
115+
116+
# create a key which will be alive when testing
117+
await redis_registry.set_key_alive(key, STILL_ALIVE_KEY_TIMEOUT)
115118
assert await redis_registry.is_key_alive(key) == True
116119
# create soon to be dead key
117-
TIMEOUT = 3
118-
await redis_registry.set_key_alive(second_key, False, TIMEOUT)
120+
await redis_registry.set_key_alive(second_key, DEAD_KEY_TIMEOUT)
119121
alive_keys, dead_keys = await redis_registry.get_all_resource_keys()
120122
assert not dead_keys
121123
assert all(x in alive_keys for x in [key, second_key])
122124
assert all(x in [key, second_key] for x in alive_keys)
123-
time.sleep(TIMEOUT)
125+
126+
time.sleep(DEAD_KEY_TIMEOUT)
127+
124128
assert await redis_registry.is_key_alive(second_key) == False
125129
alive_keys, dead_keys = await redis_registry.get_all_resource_keys()
126130
assert alive_keys == [key]
@@ -137,6 +141,29 @@ async def test_redis_registry(loop, redis_registry):
137141
)
138142

139143

144+
async def test_redis_registry_key_will_always_expire(loop, redis_registry):
145+
get_random_int = lambda: randint(1, 10)
146+
first_key = {f"key_{x}": f"value_{x}" for x in range(get_random_int())}
147+
second_key = {f"sec_key_{x}": f"sec_value_{x}" for x in range(get_random_int())}
148+
149+
resources = [(f"res_key{x}", f"res_value{x}") for x in range(get_random_int())]
150+
for resource in resources:
151+
await redis_registry.set_resource(first_key, resource)
152+
await redis_registry.set_resource(second_key, resource)
153+
154+
await redis_registry.set_key_alive(first_key, 0)
155+
await redis_registry.set_key_alive(second_key, -3000)
156+
157+
time.sleep(1) # minimum amount of sleep
158+
159+
assert await redis_registry.is_key_alive(second_key) == False
160+
assert await redis_registry.is_key_alive(first_key) == False
161+
162+
alive_keys, dead_keys = await redis_registry.get_all_resource_keys()
163+
assert len(alive_keys) == 0
164+
assert len(dead_keys) == 2
165+
166+
140167
async def test_websocket_manager(loop, redis_enabled_app, redis_registry, user_ids):
141168

142169
# create some user ids and socket ids

0 commit comments

Comments
 (0)