Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 666ae87

Browse files
authored
Update event push action and receipt tables to support threads. (#13753)
Adds a `thread_id` column to the `event_push_actions`, `event_push_actions_staging`, and `event_push_summary` tables. This will notifications to be segmented by the thread in a future pull request. The `thread_id` column stores the root event ID or the special value `"main"`. The `thread_id` column for `event_push_actions` and `event_push_summary` is backfilled with `"main"` for all existing rows. New entries into `event_push_actions` and `event_push_actions_staging` will get the proper thread ID. `receipts_linearized` and `receipts_graph` also gain a `thread_id` column, which is similar, except `NULL` is a special value meaning the receipt is "unthreaded". See MSC3771 and MSC3773 for where this data will be useful.
1 parent f2d12cc commit 666ae87

File tree

11 files changed

+312
-20
lines changed

11 files changed

+312
-20
lines changed

changelog.d/13753.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Prepatory work for storing thread IDs for notifications and receipts.

synapse/push/bulk_push_rule_evaluator.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -198,15 +198,15 @@ async def _get_power_levels_and_sender_level(
198198
return pl_event.content if pl_event else {}, sender_level
199199

200200
async def _get_mutual_relations(
201-
self, event: EventBase, rules: Iterable[Tuple[PushRule, bool]]
201+
self, parent_id: str, rules: Iterable[Tuple[PushRule, bool]]
202202
) -> Dict[str, Set[Tuple[str, str]]]:
203203
"""
204204
Fetch event metadata for events which related to the same event as the given event.
205205
206206
If the given event has no relation information, returns an empty dictionary.
207207
208208
Args:
209-
event_id: The event ID which is targeted by relations.
209+
parent_id: The event ID which is targeted by relations.
210210
rules: The push rules which will be processed for this event.
211211
212212
Returns:
@@ -220,12 +220,6 @@ async def _get_mutual_relations(
220220
if not self._relations_match_enabled:
221221
return {}
222222

223-
# If the event does not have a relation, then cannot have any mutual
224-
# relations.
225-
relation = relation_from_event(event)
226-
if not relation:
227-
return {}
228-
229223
# Pre-filter to figure out which relation types are interesting.
230224
rel_types = set()
231225
for rule, enabled in rules:
@@ -246,9 +240,7 @@ async def _get_mutual_relations(
246240
return {}
247241

248242
# If any valid rules were found, fetch the mutual relations.
249-
return await self.store.get_mutual_event_relations(
250-
relation.parent_id, rel_types
251-
)
243+
return await self.store.get_mutual_event_relations(parent_id, rel_types)
252244

253245
@measure_func("action_for_event_by_user")
254246
async def action_for_event_by_user(
@@ -281,9 +273,17 @@ async def action_for_event_by_user(
281273
sender_power_level,
282274
) = await self._get_power_levels_and_sender_level(event, context)
283275

284-
relations = await self._get_mutual_relations(
285-
event, itertools.chain(*rules_by_user.values())
286-
)
276+
relation = relation_from_event(event)
277+
# If the event does not have a relation, then cannot have any mutual
278+
# relations or thread ID.
279+
relations = {}
280+
thread_id = "main"
281+
if relation:
282+
relations = await self._get_mutual_relations(
283+
relation.parent_id, itertools.chain(*rules_by_user.values())
284+
)
285+
if relation.rel_type == RelationTypes.THREAD:
286+
thread_id = relation.parent_id
287287

288288
evaluator = PushRuleEvaluatorForEvent(
289289
event,
@@ -352,6 +352,7 @@ async def action_for_event_by_user(
352352
event.event_id,
353353
actions_by_user,
354354
count_as_unread,
355+
thread_id,
355356
)
356357

357358

synapse/storage/databases/main/event_push_actions.py

Lines changed: 118 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
)
9999
from synapse.storage.databases.main.receipts import ReceiptsWorkerStore
100100
from synapse.storage.databases.main.stream import StreamWorkerStore
101+
from synapse.types import JsonDict
101102
from synapse.util import json_encoder
102103
from synapse.util.caches.descriptors import cached
103104

@@ -232,6 +233,104 @@ def __init__(
232233
replaces_index="event_push_summary_user_rm",
233234
)
234235

236+
self.db_pool.updates.register_background_index_update(
237+
"event_push_summary_unique_index2",
238+
index_name="event_push_summary_unique_index2",
239+
table="event_push_summary",
240+
columns=["user_id", "room_id", "thread_id"],
241+
unique=True,
242+
)
243+
244+
self.db_pool.updates.register_background_update_handler(
245+
"event_push_backfill_thread_id",
246+
self._background_backfill_thread_id,
247+
)
248+
249+
async def _background_backfill_thread_id(
250+
self, progress: JsonDict, batch_size: int
251+
) -> int:
252+
"""
253+
Fill in the thread_id field for event_push_actions and event_push_summary.
254+
255+
This is preparatory so that it can be made non-nullable in the future.
256+
257+
Because all current (null) data is done in an unthreaded manner this
258+
simply assumes it is on the "main" timeline. Since event_push_actions
259+
are periodically cleared it is not possible to correctly re-calculate
260+
the thread_id.
261+
"""
262+
event_push_actions_done = progress.get("event_push_actions_done", False)
263+
264+
def add_thread_id_txn(
265+
txn: LoggingTransaction, table_name: str, start_stream_ordering: int
266+
) -> int:
267+
sql = f"""
268+
SELECT stream_ordering
269+
FROM {table_name}
270+
WHERE
271+
thread_id IS NULL
272+
AND stream_ordering > ?
273+
ORDER BY stream_ordering
274+
LIMIT ?
275+
"""
276+
txn.execute(sql, (start_stream_ordering, batch_size))
277+
278+
# No more rows to process.
279+
rows = txn.fetchall()
280+
if not rows:
281+
progress[f"{table_name}_done"] = True
282+
self.db_pool.updates._background_update_progress_txn(
283+
txn, "event_push_backfill_thread_id", progress
284+
)
285+
return 0
286+
287+
# Update the thread ID for any of those rows.
288+
max_stream_ordering = rows[-1][0]
289+
290+
sql = f"""
291+
UPDATE {table_name}
292+
SET thread_id = 'main'
293+
WHERE stream_ordering <= ? AND thread_id IS NULL
294+
"""
295+
txn.execute(sql, (max_stream_ordering,))
296+
297+
# Update progress.
298+
processed_rows = txn.rowcount
299+
progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering
300+
self.db_pool.updates._background_update_progress_txn(
301+
txn, "event_push_backfill_thread_id", progress
302+
)
303+
304+
return processed_rows
305+
306+
# First update the event_push_actions table, then the event_push_summary table.
307+
#
308+
# Note that the event_push_actions_staging table is ignored since it is
309+
# assumed that items in that table will only exist for a short period of
310+
# time.
311+
if not event_push_actions_done:
312+
result = await self.db_pool.runInteraction(
313+
"event_push_backfill_thread_id",
314+
add_thread_id_txn,
315+
"event_push_actions",
316+
progress.get("max_event_push_actions_stream_ordering", 0),
317+
)
318+
else:
319+
result = await self.db_pool.runInteraction(
320+
"event_push_backfill_thread_id",
321+
add_thread_id_txn,
322+
"event_push_summary",
323+
progress.get("max_event_push_summary_stream_ordering", 0),
324+
)
325+
326+
# Only done after the event_push_summary table is done.
327+
if not result:
328+
await self.db_pool.updates._end_background_update(
329+
"event_push_backfill_thread_id"
330+
)
331+
332+
return result
333+
235334
@cached(tree=True, max_entries=5000)
236335
async def get_unread_event_push_actions_by_room_for_user(
237336
self,
@@ -670,6 +769,7 @@ async def add_push_actions_to_staging(
670769
event_id: str,
671770
user_id_actions: Dict[str, Collection[Union[Mapping, str]]],
672771
count_as_unread: bool,
772+
thread_id: str,
673773
) -> None:
674774
"""Add the push actions for the event to the push action staging area.
675775
@@ -678,6 +778,7 @@ async def add_push_actions_to_staging(
678778
user_id_actions: A mapping of user_id to list of push actions, where
679779
an action can either be a string or dict.
680780
count_as_unread: Whether this event should increment unread counts.
781+
thread_id: The thread this event is parent of, if applicable.
681782
"""
682783
if not user_id_actions:
683784
return
@@ -686,7 +787,7 @@ async def add_push_actions_to_staging(
686787
# can be used to insert into the `event_push_actions_staging` table.
687788
def _gen_entry(
688789
user_id: str, actions: Collection[Union[Mapping, str]]
689-
) -> Tuple[str, str, str, int, int, int]:
790+
) -> Tuple[str, str, str, int, int, int, str]:
690791
is_highlight = 1 if _action_has_highlight(actions) else 0
691792
notif = 1 if "notify" in actions else 0
692793
return (
@@ -696,11 +797,20 @@ def _gen_entry(
696797
notif, # notif column
697798
is_highlight, # highlight column
698799
int(count_as_unread), # unread column
800+
thread_id, # thread_id column
699801
)
700802

701803
await self.db_pool.simple_insert_many(
702804
"event_push_actions_staging",
703-
keys=("event_id", "user_id", "actions", "notif", "highlight", "unread"),
805+
keys=(
806+
"event_id",
807+
"user_id",
808+
"actions",
809+
"notif",
810+
"highlight",
811+
"unread",
812+
"thread_id",
813+
),
704814
values=[
705815
_gen_entry(user_id, actions)
706816
for user_id, actions in user_id_actions.items()
@@ -981,6 +1091,8 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool:
9811091
)
9821092

9831093
# Replace the previous summary with the new counts.
1094+
#
1095+
# TODO(threads): Upsert per-thread instead of setting them all to main.
9841096
self.db_pool.simple_upsert_txn(
9851097
txn,
9861098
table="event_push_summary",
@@ -990,6 +1102,7 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool:
9901102
"unread_count": unread_count,
9911103
"stream_ordering": old_rotate_stream_ordering,
9921104
"last_receipt_stream_ordering": stream_ordering,
1105+
"thread_id": "main",
9931106
},
9941107
)
9951108

@@ -1138,17 +1251,19 @@ def _rotate_notifs_before_txn(
11381251

11391252
logger.info("Rotating notifications, handling %d rows", len(summaries))
11401253

1254+
# TODO(threads): Update on a per-thread basis.
11411255
self.db_pool.simple_upsert_many_txn(
11421256
txn,
11431257
table="event_push_summary",
11441258
key_names=("user_id", "room_id"),
11451259
key_values=[(user_id, room_id) for user_id, room_id in summaries],
1146-
value_names=("notif_count", "unread_count", "stream_ordering"),
1260+
value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"),
11471261
value_values=[
11481262
(
11491263
summary.notif_count,
11501264
summary.unread_count,
11511265
summary.stream_ordering,
1266+
"main",
11521267
)
11531268
for summary in summaries.values()
11541269
],

synapse/storage/databases/main/events.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2192,9 +2192,9 @@ def _set_push_actions_for_event_and_users_txn(
21922192
sql = """
21932193
INSERT INTO event_push_actions (
21942194
room_id, event_id, user_id, actions, stream_ordering,
2195-
topological_ordering, notif, highlight, unread
2195+
topological_ordering, notif, highlight, unread, thread_id
21962196
)
2197-
SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread
2197+
SELECT ?, event_id, user_id, actions, ?, ?, notif, highlight, unread, thread_id
21982198
FROM event_push_actions_staging
21992199
WHERE event_id = ?
22002200
"""

synapse/storage/databases/main/receipts.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,24 @@ def __init__(
113113
prefilled_cache=receipts_stream_prefill,
114114
)
115115

116+
self.db_pool.updates.register_background_index_update(
117+
"receipts_linearized_unique_index",
118+
index_name="receipts_linearized_unique_index",
119+
table="receipts_linearized",
120+
columns=["room_id", "receipt_type", "user_id"],
121+
where_clause="thread_id IS NULL",
122+
unique=True,
123+
)
124+
125+
self.db_pool.updates.register_background_index_update(
126+
"receipts_graph_unique_index",
127+
index_name="receipts_graph_unique_index",
128+
table="receipts_graph",
129+
columns=["room_id", "receipt_type", "user_id"],
130+
where_clause="thread_id IS NULL",
131+
unique=True,
132+
)
133+
116134
def get_max_receipt_stream_id(self) -> int:
117135
"""Get the current max stream ID for receipts stream"""
118136
return self._receipts_id_gen.get_current_token()
@@ -677,6 +695,7 @@ def _insert_linearized_receipt_txn(
677695
"event_id": event_id,
678696
"event_stream_ordering": stream_ordering,
679697
"data": json_encoder.encode(data),
698+
"thread_id": None,
680699
},
681700
# receipts_linearized has a unique constraint on
682701
# (user_id, room_id, receipt_type), so no need to lock
@@ -824,6 +843,7 @@ def _insert_graph_receipt_txn(
824843
values={
825844
"event_ids": json_encoder.encode(event_ids),
826845
"data": json_encoder.encode(data),
846+
"thread_id": None,
827847
},
828848
# receipts_graph has a unique constraint on
829849
# (user_id, room_id, receipt_type), so no need to lock

synapse/storage/schema/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
SCHEMA_VERSION = 72 # remember to update the list below when updating
15+
SCHEMA_VERSION = 73 # remember to update the list below when updating
1616
"""Represents the expectations made by the codebase about the database schema
1717
1818
This should be incremented whenever the codebase changes its requirements on the
@@ -77,6 +77,10 @@
7777
- Tables related to groups are dropped.
7878
- Unused column application_services_state.last_txn is dropped
7979
- Cache invalidation stream id sequence now begins at 2 to match code expectation.
80+
81+
Changes in SCHEMA_VERSION = 73;
82+
- thread_id column is added to event_push_actions, event_push_actions_staging
83+
event_push_summary, receipts_linearized, and receipts_graph.
8084
"""
8185

8286

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* Copyright 2022 The Matrix.org Foundation C.I.C
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
-- Add a nullable column for thread ID to the event push actions tables; this
17+
-- will be filled in with a default value for any previously existing rows.
18+
--
19+
-- After migration this can be made non-nullable.
20+
21+
ALTER TABLE event_push_actions_staging ADD COLUMN thread_id TEXT;
22+
ALTER TABLE event_push_actions ADD COLUMN thread_id TEXT;
23+
ALTER TABLE event_push_summary ADD COLUMN thread_id TEXT;
24+
25+
-- Update the unique index for `event_push_summary`.
26+
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
27+
(7006, 'event_push_summary_unique_index2', '{}');
28+
29+
INSERT INTO background_updates (ordering, update_name, progress_json, depends_on) VALUES
30+
(7006, 'event_push_backfill_thread_id', '{}', 'event_push_summary_unique_index2');

0 commit comments

Comments
 (0)