Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 54c012c

Browse files
Mathieu Veltensquahtx
andauthored
Make handle_new_client_event throws PartialStateConflictError (#14665)
Then adapts calling code to retry when needed so it doesn't 500 to clients. Signed-off-by: Mathieu Velten <[email protected]> Co-authored-by: Sean Quah <[email protected]>
1 parent 046320b commit 54c012c

File tree

7 files changed

+360
-239
lines changed

7 files changed

+360
-239
lines changed

changelog.d/14665.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Change `handle_new_client_event` signature so that a 429 does not reach clients on `PartialStateConflictError`, and internally retry when needed instead.

synapse/handlers/federation.py

Lines changed: 78 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,32 +1343,53 @@ async def exchange_third_party_invite(
13431343
)
13441344

13451345
EventValidator().validate_builder(builder)
1346-
event, context = await self.event_creation_handler.create_new_client_event(
1347-
builder=builder
1348-
)
13491346

1350-
event, context = await self.add_display_name_to_third_party_invite(
1351-
room_version_obj, event_dict, event, context
1352-
)
1347+
# Try several times, it could fail with PartialStateConflictError
1348+
# in send_membership_event, cf comment in except block.
1349+
max_retries = 5
1350+
for i in range(max_retries):
1351+
try:
1352+
(
1353+
event,
1354+
context,
1355+
) = await self.event_creation_handler.create_new_client_event(
1356+
builder=builder
1357+
)
13531358

1354-
EventValidator().validate_new(event, self.config)
1359+
event, context = await self.add_display_name_to_third_party_invite(
1360+
room_version_obj, event_dict, event, context
1361+
)
13551362

1356-
# We need to tell the transaction queue to send this out, even
1357-
# though the sender isn't a local user.
1358-
event.internal_metadata.send_on_behalf_of = self.hs.hostname
1363+
EventValidator().validate_new(event, self.config)
13591364

1360-
try:
1361-
validate_event_for_room_version(event)
1362-
await self._event_auth_handler.check_auth_rules_from_context(event)
1363-
except AuthError as e:
1364-
logger.warning("Denying new third party invite %r because %s", event, e)
1365-
raise e
1365+
# We need to tell the transaction queue to send this out, even
1366+
# though the sender isn't a local user.
1367+
event.internal_metadata.send_on_behalf_of = self.hs.hostname
13661368

1367-
await self._check_signature(event, context)
1369+
try:
1370+
validate_event_for_room_version(event)
1371+
await self._event_auth_handler.check_auth_rules_from_context(
1372+
event
1373+
)
1374+
except AuthError as e:
1375+
logger.warning(
1376+
"Denying new third party invite %r because %s", event, e
1377+
)
1378+
raise e
13681379

1369-
# We retrieve the room member handler here as to not cause a cyclic dependency
1370-
member_handler = self.hs.get_room_member_handler()
1371-
await member_handler.send_membership_event(None, event, context)
1380+
await self._check_signature(event, context)
1381+
1382+
# We retrieve the room member handler here as to not cause a cyclic dependency
1383+
member_handler = self.hs.get_room_member_handler()
1384+
await member_handler.send_membership_event(None, event, context)
1385+
1386+
break
1387+
except PartialStateConflictError as e:
1388+
# Persisting couldn't happen because the room got un-partial stated
1389+
# in the meantime and context needs to be recomputed, so let's do so.
1390+
if i == max_retries - 1:
1391+
raise e
1392+
pass
13721393
else:
13731394
destinations = {x.split(":", 1)[-1] for x in (sender_user_id, room_id)}
13741395

@@ -1400,28 +1421,46 @@ async def on_exchange_third_party_invite_request(
14001421
room_version_obj, event_dict
14011422
)
14021423

1403-
event, context = await self.event_creation_handler.create_new_client_event(
1404-
builder=builder
1405-
)
1406-
event, context = await self.add_display_name_to_third_party_invite(
1407-
room_version_obj, event_dict, event, context
1408-
)
1424+
# Try several times, it could fail with PartialStateConflictError
1425+
# in send_membership_event, cf comment in except block.
1426+
max_retries = 5
1427+
for i in range(max_retries):
1428+
try:
1429+
(
1430+
event,
1431+
context,
1432+
) = await self.event_creation_handler.create_new_client_event(
1433+
builder=builder
1434+
)
1435+
event, context = await self.add_display_name_to_third_party_invite(
1436+
room_version_obj, event_dict, event, context
1437+
)
14091438

1410-
try:
1411-
validate_event_for_room_version(event)
1412-
await self._event_auth_handler.check_auth_rules_from_context(event)
1413-
except AuthError as e:
1414-
logger.warning("Denying third party invite %r because %s", event, e)
1415-
raise e
1416-
await self._check_signature(event, context)
1439+
try:
1440+
validate_event_for_room_version(event)
1441+
await self._event_auth_handler.check_auth_rules_from_context(event)
1442+
except AuthError as e:
1443+
logger.warning("Denying third party invite %r because %s", event, e)
1444+
raise e
1445+
await self._check_signature(event, context)
1446+
1447+
# We need to tell the transaction queue to send this out, even
1448+
# though the sender isn't a local user.
1449+
event.internal_metadata.send_on_behalf_of = get_domain_from_id(
1450+
event.sender
1451+
)
14171452

1418-
# We need to tell the transaction queue to send this out, even
1419-
# though the sender isn't a local user.
1420-
event.internal_metadata.send_on_behalf_of = get_domain_from_id(event.sender)
1453+
# We retrieve the room member handler here as to not cause a cyclic dependency
1454+
member_handler = self.hs.get_room_member_handler()
1455+
await member_handler.send_membership_event(None, event, context)
14211456

1422-
# We retrieve the room member handler here as to not cause a cyclic dependency
1423-
member_handler = self.hs.get_room_member_handler()
1424-
await member_handler.send_membership_event(None, event, context)
1457+
break
1458+
except PartialStateConflictError as e:
1459+
# Persisting couldn't happen because the room got un-partial stated
1460+
# in the meantime and context needs to be recomputed, so let's do so.
1461+
if i == max_retries - 1:
1462+
raise e
1463+
pass
14251464

14261465
async def add_display_name_to_third_party_invite(
14271466
self,

synapse/handlers/message.py

Lines changed: 108 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
AuthError,
3838
Codes,
3939
ConsentNotGivenError,
40-
LimitExceededError,
4140
NotFoundError,
4241
ShadowBanError,
4342
SynapseError,
@@ -999,60 +998,73 @@ async def create_and_send_nonmember_event(
999998
event.internal_metadata.stream_ordering,
1000999
)
10011000

1002-
event, context = await self.create_event(
1003-
requester,
1004-
event_dict,
1005-
txn_id=txn_id,
1006-
allow_no_prev_events=allow_no_prev_events,
1007-
prev_event_ids=prev_event_ids,
1008-
state_event_ids=state_event_ids,
1009-
outlier=outlier,
1010-
historical=historical,
1011-
depth=depth,
1012-
)
1001+
# Try several times, it could fail with PartialStateConflictError
1002+
# in handle_new_client_event, cf comment in except block.
1003+
max_retries = 5
1004+
for i in range(max_retries):
1005+
try:
1006+
event, context = await self.create_event(
1007+
requester,
1008+
event_dict,
1009+
txn_id=txn_id,
1010+
allow_no_prev_events=allow_no_prev_events,
1011+
prev_event_ids=prev_event_ids,
1012+
state_event_ids=state_event_ids,
1013+
outlier=outlier,
1014+
historical=historical,
1015+
depth=depth,
1016+
)
10131017

1014-
assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
1015-
event.sender,
1016-
)
1018+
assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % (
1019+
event.sender,
1020+
)
10171021

1018-
spam_check_result = await self.spam_checker.check_event_for_spam(event)
1019-
if spam_check_result != self.spam_checker.NOT_SPAM:
1020-
if isinstance(spam_check_result, tuple):
1021-
try:
1022-
[code, dict] = spam_check_result
1023-
raise SynapseError(
1024-
403,
1025-
"This message had been rejected as probable spam",
1026-
code,
1027-
dict,
1028-
)
1029-
except ValueError:
1030-
logger.error(
1031-
"Spam-check module returned invalid error value. Expecting [code, dict], got %s",
1032-
spam_check_result,
1033-
)
1022+
spam_check_result = await self.spam_checker.check_event_for_spam(event)
1023+
if spam_check_result != self.spam_checker.NOT_SPAM:
1024+
if isinstance(spam_check_result, tuple):
1025+
try:
1026+
[code, dict] = spam_check_result
1027+
raise SynapseError(
1028+
403,
1029+
"This message had been rejected as probable spam",
1030+
code,
1031+
dict,
1032+
)
1033+
except ValueError:
1034+
logger.error(
1035+
"Spam-check module returned invalid error value. Expecting [code, dict], got %s",
1036+
spam_check_result,
1037+
)
10341038

1035-
raise SynapseError(
1036-
403,
1037-
"This message has been rejected as probable spam",
1038-
Codes.FORBIDDEN,
1039-
)
1039+
raise SynapseError(
1040+
403,
1041+
"This message has been rejected as probable spam",
1042+
Codes.FORBIDDEN,
1043+
)
10401044

1041-
# Backwards compatibility: if the return value is not an error code, it
1042-
# means the module returned an error message to be included in the
1043-
# SynapseError (which is now deprecated).
1044-
raise SynapseError(
1045-
403,
1046-
spam_check_result,
1047-
Codes.FORBIDDEN,
1045+
# Backwards compatibility: if the return value is not an error code, it
1046+
# means the module returned an error message to be included in the
1047+
# SynapseError (which is now deprecated).
1048+
raise SynapseError(
1049+
403,
1050+
spam_check_result,
1051+
Codes.FORBIDDEN,
1052+
)
1053+
1054+
ev = await self.handle_new_client_event(
1055+
requester=requester,
1056+
events_and_context=[(event, context)],
1057+
ratelimit=ratelimit,
1058+
ignore_shadow_ban=ignore_shadow_ban,
10481059
)
10491060

1050-
ev = await self.handle_new_client_event(
1051-
requester=requester,
1052-
events_and_context=[(event, context)],
1053-
ratelimit=ratelimit,
1054-
ignore_shadow_ban=ignore_shadow_ban,
1055-
)
1061+
break
1062+
except PartialStateConflictError as e:
1063+
# Persisting couldn't happen because the room got un-partial stated
1064+
# in the meantime and context needs to be recomputed, so let's do so.
1065+
if i == max_retries - 1:
1066+
raise e
1067+
pass
10561068

10571069
# we know it was persisted, so must have a stream ordering
10581070
assert ev.internal_metadata.stream_ordering
@@ -1356,7 +1368,7 @@ async def handle_new_client_event(
13561368
13571369
Raises:
13581370
ShadowBanError if the requester has been shadow-banned.
1359-
SynapseError(503) if attempting to persist a partial state event in
1371+
PartialStateConflictError if attempting to persist a partial state event in
13601372
a room that has been un-partial stated.
13611373
"""
13621374
extra_users = extra_users or []
@@ -1418,34 +1430,23 @@ async def handle_new_client_event(
14181430
# We now persist the event (and update the cache in parallel, since we
14191431
# don't want to block on it).
14201432
event, context = events_and_context[0]
1421-
try:
1422-
result, _ = await make_deferred_yieldable(
1423-
gather_results(
1424-
(
1425-
run_in_background(
1426-
self._persist_events,
1427-
requester=requester,
1428-
events_and_context=events_and_context,
1429-
ratelimit=ratelimit,
1430-
extra_users=extra_users,
1431-
),
1432-
run_in_background(
1433-
self.cache_joined_hosts_for_events, events_and_context
1434-
).addErrback(
1435-
log_failure, "cache_joined_hosts_for_event failed"
1436-
),
1433+
result, _ = await make_deferred_yieldable(
1434+
gather_results(
1435+
(
1436+
run_in_background(
1437+
self._persist_events,
1438+
requester=requester,
1439+
events_and_context=events_and_context,
1440+
ratelimit=ratelimit,
1441+
extra_users=extra_users,
14371442
),
1438-
consumeErrors=True,
1439-
)
1440-
).addErrback(unwrapFirstError)
1441-
except PartialStateConflictError as e:
1442-
# The event context needs to be recomputed.
1443-
# Turn the error into a 429, as a hint to the client to try again.
1444-
logger.info(
1445-
"Room %s was un-partial stated while persisting client event.",
1446-
event.room_id,
1443+
run_in_background(
1444+
self.cache_joined_hosts_for_events, events_and_context
1445+
).addErrback(log_failure, "cache_joined_hosts_for_event failed"),
1446+
),
1447+
consumeErrors=True,
14471448
)
1448-
raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0)
1449+
).addErrback(unwrapFirstError)
14491450

14501451
return result
14511452

@@ -2012,26 +2013,39 @@ async def _send_dummy_event_for_room(self, room_id: str) -> bool:
20122013
for user_id in members:
20132014
requester = create_requester(user_id, authenticated_entity=self.server_name)
20142015
try:
2015-
event, context = await self.create_event(
2016-
requester,
2017-
{
2018-
"type": EventTypes.Dummy,
2019-
"content": {},
2020-
"room_id": room_id,
2021-
"sender": user_id,
2022-
},
2023-
)
2016+
# Try several times, it could fail with PartialStateConflictError
2017+
# in handle_new_client_event, cf comment in except block.
2018+
max_retries = 5
2019+
for i in range(max_retries):
2020+
try:
2021+
event, context = await self.create_event(
2022+
requester,
2023+
{
2024+
"type": EventTypes.Dummy,
2025+
"content": {},
2026+
"room_id": room_id,
2027+
"sender": user_id,
2028+
},
2029+
)
20242030

2025-
event.internal_metadata.proactively_send = False
2031+
event.internal_metadata.proactively_send = False
20262032

2027-
# Since this is a dummy-event it is OK if it is sent by a
2028-
# shadow-banned user.
2029-
await self.handle_new_client_event(
2030-
requester,
2031-
events_and_context=[(event, context)],
2032-
ratelimit=False,
2033-
ignore_shadow_ban=True,
2034-
)
2033+
# Since this is a dummy-event it is OK if it is sent by a
2034+
# shadow-banned user.
2035+
await self.handle_new_client_event(
2036+
requester,
2037+
events_and_context=[(event, context)],
2038+
ratelimit=False,
2039+
ignore_shadow_ban=True,
2040+
)
2041+
2042+
break
2043+
except PartialStateConflictError as e:
2044+
# Persisting couldn't happen because the room got un-partial stated
2045+
# in the meantime and context needs to be recomputed, so let's do so.
2046+
if i == max_retries - 1:
2047+
raise e
2048+
pass
20352049
return True
20362050
except AuthError:
20372051
logger.info(

0 commit comments

Comments
 (0)