Skip to content

Commit 81e2c85

Browse files
committed
Add seqno_creation to be able to skip irrelevant deletions
Add a new column messages.seqno_creation that lets us track a message's initial seqno value. This allows us to skip deletion tombstones for messages that the user doesn't have (i.e. because the message was both created and then deleted after the client's current seqno) which can significantly drop the number of messages we have to deliver when syncing up a room with a lot of deletions.
1 parent 6893d2a commit 81e2c85

File tree

7 files changed

+162
-10
lines changed

7 files changed

+162
-10
lines changed

sogs/migrations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
reactions,
1414
room_accessible,
1515
room_moderators,
16+
seqno_creation,
1617
seqno_etc,
1718
user_permissions,
1819
user_perm_futures,
@@ -41,6 +42,7 @@ def migrate(conn, *, check_only=False):
4142
new_columns,
4243
seqno_etc,
4344
reactions,
45+
seqno_creation,
4446
message_views,
4547
user_perm_futures,
4648
room_accessible,

sogs/migrations/message_views.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ def migrate(conn, *, check_only):
6161
conn.execute(
6262
"""
6363
CREATE VIEW message_metadata AS
64-
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions,
64+
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions, seqno_creation,
6565
filtered, whisper_to, whisper_mods,
6666
length(data) AS data_unpadded, data_size, length(signature) as signature_length
6767
FROM message_details
68-
"""
68+
""" # noqa: E501
6969
)
7070

7171
else: # postgresql
@@ -100,11 +100,11 @@ def migrate(conn, *, check_only):
100100
-- View of `messages` that is useful for manually inspecting table contents by only returning the
101101
-- length (rather than raw bytes) for data/signature.
102102
CREATE VIEW message_metadata AS
103-
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions,
103+
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions, seqno_creation,
104104
filtered, whisper_to, whisper_mods,
105105
length(data) AS data_unpadded, data_size, length(signature) as signature_length
106106
FROM message_details;
107-
"""
107+
""" # noqa: E501
108108
)
109109

110110
return True

sogs/migrations/seqno_creation.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import logging
2+
from .exc import DatabaseUpgradeRequired
3+
4+
5+
def migrate(conn, *, check_only):
6+
"""
7+
Adds a seqno_creation column to track the seqno when a message was created so that we can skip
8+
deleted messages entirely (i.e. omit the tombstone) when polling from a seqno before the message
9+
was created.
10+
"""
11+
12+
from .. import db
13+
14+
if 'seqno_creation' in db.metadata.tables['messages'].c:
15+
return False
16+
17+
if check_only:
18+
raise DatabaseUpgradeRequired("message creation seqno")
19+
20+
logging.warning("Adding messages.seqno_creation column")
21+
if db.engine.name == 'sqlite':
22+
conn.execute("ALTER TABLE messages ADD COLUMN seqno_creation INTEGER NOT NULL DEFAULT 0")
23+
conn.execute("DROP TRIGGER IF EXISTS messages_insert_counter")
24+
conn.execute(
25+
"""
26+
CREATE TRIGGER messages_insert_counter AFTER INSERT ON messages
27+
FOR EACH ROW
28+
BEGIN
29+
UPDATE rooms SET message_sequence = message_sequence + 1 WHERE id = NEW.room;
30+
UPDATE messages SET seqno_data = (SELECT message_sequence FROM rooms WHERE id = NEW.room) WHERE id = NEW.id;
31+
UPDATE messages SET seqno_creation = seqno_data WHERE id = NEW.id;
32+
END
33+
""" # noqa: E501
34+
)
35+
else: # postgresql
36+
conn.execute(
37+
"""
38+
ALTER TABLE messages ADD COLUMN seqno_creation BIGINT NOT NULL DEFAULT 0;
39+
40+
CREATE OR REPLACE FUNCTION trigger_messages_insert_counter()
41+
RETURNS TRIGGER LANGUAGE PLPGSQL AS $$
42+
DECLARE
43+
new_seqno BIGINT := increment_room_sequence(NEW.room);
44+
BEGIN
45+
UPDATE messages SET seqno_data = new_seqno, seqno_creation = new_seqno WHERE id = NEW.id;
46+
RETURN NULL;
47+
END;$$;
48+
DROP TRIGGER IF EXISTS messages_insert_counter ON messages;
49+
CREATE TRIGGER messages_insert_counter AFTER INSERT ON messages
50+
FOR EACH ROW EXECUTE PROCEDURE trigger_messages_insert_counter();
51+
"""
52+
)
53+
54+
# Drop these to be recreated (with the no column) in the message_views migration.
55+
conn.execute("DROP VIEW IF EXISTS message_metadata")
56+
conn.execute("DROP VIEW IF EXISTS message_details")
57+
58+
return True

sogs/model/room.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,15 @@ def get_messages_for(
600600
if after <= max_old_id:
601601
after += offset
602602

603-
# We include deletions only if doing a sequence update request:
604-
not_deleted_clause = '' if sequence is not None else 'AND data IS NOT NULL'
603+
# We include deletions only if doing a sequence update request, but only include deletions
604+
# for messages that were created *before* the given sequence number (the client won't have
605+
# messages created after that, so it is pointless to send them tombstones for messages they
606+
# don't know about).
607+
not_deleted_clause = (
608+
'AND (data IS NOT NULL OR seqno_creation <= :sequence)'
609+
if sequence is not None
610+
else 'AND data IS NOT NULL'
611+
)
605612
message_clause = (
606613
'AND seqno > :sequence AND seqno_data > :sequence'
607614
if sequence is not None and not reaction_updates

sogs/schema.pgsql

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ CREATE TABLE messages (
3939
seqno BIGINT NOT NULL DEFAULT 0, /* set to the room's `message_sequence` counter when any of the individual seqno values are updated */
4040
seqno_data BIGINT NOT NULL DEFAULT 0, /* updated when `data` changes (i.e. edits, deletions) */
4141
seqno_reactions BIGINT NOT NULL DEFAULT 0, /* updated when reactions are added/removed */
42+
seqno_creation BIGINT NOT NULL DEFAULT 0, /* set to the seqno at the time of creation (and not updated afterwards) */
4243
data BYTEA, /* Actual message content, not including trailing padding; set to null to delete a message */
4344
data_size BIGINT, /* The message size, including trailing padding (needed because the signature is over the padded data) */
4445
signature BYTEA, /* Signature of `data` by `public_key`; set to null when deleting a message */
@@ -72,8 +73,11 @@ END;$$;
7273
-- Trigger to increment a room's `message_sequence` counter and assign it to the message's `seqno`
7374
-- field for new messages.
7475
CREATE OR REPLACE FUNCTION trigger_messages_insert_counter()
75-
RETURNS TRIGGER LANGUAGE PLPGSQL AS $$BEGIN
76-
UPDATE messages SET seqno_data = increment_room_sequence(NEW.room) WHERE id = NEW.id;
76+
RETURNS TRIGGER LANGUAGE PLPGSQL AS $$
77+
DECLARE
78+
new_seqno BIGINT := increment_room_sequence(NEW.room);
79+
BEGIN
80+
UPDATE messages SET seqno_data = new_seqno, seqno_creation = new_seqno WHERE id = NEW.id;
7781
RETURN NULL;
7882
END;$$;
7983
CREATE TRIGGER messages_insert_counter AFTER INSERT ON messages
@@ -330,7 +334,7 @@ EXECUTE PROCEDURE trigger_message_details_deleter();
330334
-- View of `messages` that is useful for manually inspecting table contents by only returning the
331335
-- length (rather than raw bytes) for data/signature.
332336
CREATE VIEW message_metadata AS
333-
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions,
337+
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions, seqno_creation,
334338
filtered, whisper_to, whisper_mods,
335339
length(data) AS data_unpadded, data_size, length(signature) as signature_length
336340
FROM message_details;

sogs/schema.sqlite

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ CREATE TABLE messages (
3838
seqno INTEGER NOT NULL DEFAULT 0, /* set to the room's `message_sequence` counter when any of the individual seqno values are updated */
3939
seqno_data INTEGER NOT NULL DEFAULT 0, /* updated when `data` changes (i.e. edits, deletions) */
4040
seqno_reactions INTEGER NOT NULL DEFAULT 0, /* updated when reactions are added/removed */
41+
seqno_creation INTEGER NOT NULL DEFAULT 0, /* set to the seqno at the time of creation (and not updated afterwards) */
4142
data BLOB, /* Actual message content, not including trailing padding; set to null to delete a message */
4243
data_size INTEGER, /* The message size, including trailing padding (needed because the signature is over the padded data) */
4344
signature BLOB, /* Signature of `data` by `public_key`; set to null when deleting a message */
@@ -65,6 +66,7 @@ FOR EACH ROW
6566
BEGIN
6667
UPDATE rooms SET message_sequence = message_sequence + 1 WHERE id = NEW.room;
6768
UPDATE messages SET seqno_data = (SELECT message_sequence FROM rooms WHERE id = NEW.room) WHERE id = NEW.id;
69+
UPDATE messages SET seqno_creation = seqno_data WHERE id = NEW.id;
6870
END;
6971

7072
-- Trigger to do various tasks needed when a message is edited/deleted:
@@ -280,7 +282,7 @@ END;
280282
-- View of `messages` that is useful for manually inspecting table contents by only returning the
281283
-- length (rather than raw bytes) for data/signature.
282284
CREATE VIEW message_metadata AS
283-
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions,
285+
SELECT id, room, "user", session_id, posted, edited, seqno, seqno_data, seqno_reactions, seqno_creation,
284286
filtered, whisper_to, whisper_mods,
285287
length(data) AS data_unpadded, data_size, length(signature) as signature_length
286288
FROM message_details;

tests/test_room_routes.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,85 @@ def test_fetch_since(client, room, user, no_rate_limit):
687687
assert fetches == (sum(counts) + 24) // 25
688688

689689

690+
def test_fetch_since_skip_deletions(client, room, user, no_rate_limit):
691+
# Insert 10 posts; they will have seqno == id (i.e. 1 to 10).
692+
for i in range(1, 11):
693+
room.add_post(user, f"fake data {i}".encode(), pad64(f"fake sig {i}"))
694+
695+
# Delete some:
696+
deleted = (2, 4, 5, 8, 9)
697+
for i in deleted:
698+
r = sogs_delete(client, f'/room/test-room/message/{i}', user)
699+
assert r.status_code == 200
700+
701+
def get_and_clean_since(seqno):
702+
r = sogs_get(client, f"/room/test-room/messages/since/{seqno}", user)
703+
assert r.status_code == 200
704+
res = r.json
705+
for m in res:
706+
for k in ('posted', 'session_id', 'reactions'):
707+
if k in m:
708+
del m[k]
709+
for k in ('data', 'signature', 'edited'):
710+
if k in m and m[k] is not None:
711+
m[k] = True
712+
return res
713+
714+
# If we poll from 1 we should only see the messages (skipping the first one with seqno=1) that
715+
# remain (since our polling seqno is before the deleted messages were created in the first
716+
# place):
717+
assert get_and_clean_since(1) == [
718+
{'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (3, 6, 7, 10)
719+
]
720+
721+
def deleted_entry(id, seqno):
722+
return {'id': id, 'seqno': seqno, 'edited': True, 'deleted': True, 'data': None}
723+
724+
# If we poll from 2 we should get the deletion for 2, but not the higher deletions
725+
assert get_and_clean_since(2) == [
726+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (3, 6, 7, 10)),
727+
*(deleted_entry(i, s) for i, s in ((2, 11),)),
728+
]
729+
730+
# From 4 we should get deletions 2 and 4
731+
assert get_and_clean_since(4) == [
732+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (6, 7, 10)),
733+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12))),
734+
]
735+
736+
# and so on
737+
assert get_and_clean_since(5) == [
738+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (6, 7, 10)),
739+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12), (5, 13))),
740+
]
741+
assert get_and_clean_since(6) == [
742+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (7, 10)),
743+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12), (5, 13))),
744+
]
745+
assert get_and_clean_since(7) == [
746+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (10,)),
747+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12), (5, 13))),
748+
]
749+
750+
assert get_and_clean_since(9) == [
751+
*({'id': i, 'seqno': i, 'data': True, 'signature': True} for i in (10,)),
752+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12), (5, 13), (8, 14), (9, 15))),
753+
]
754+
assert get_and_clean_since(10) == [
755+
*(deleted_entry(i, s) for i, s in ((2, 11), (4, 12), (5, 13), (8, 14), (9, 15))),
756+
]
757+
assert get_and_clean_since(11) == [
758+
*(deleted_entry(i, s) for i, s in ((4, 12), (5, 13), (8, 14), (9, 15))),
759+
]
760+
assert get_and_clean_since(13) == [
761+
*(deleted_entry(i, s) for i, s in ((8, 14), (9, 15))),
762+
]
763+
assert get_and_clean_since(14) == [
764+
*(deleted_entry(i, s) for i, s in ((9, 15),)),
765+
]
766+
assert get_and_clean_since(15) == []
767+
768+
690769
def test_fetch_before(client, room, user, no_rate_limit):
691770
for i in range(1000):
692771
room.add_post(user, f"data-{i}".encode(), pad64(f"fake sig {i}"))

0 commit comments

Comments
 (0)