Skip to content

Commit f48f5ce

Browse files
committed
Use batched deletes rather than passive_deletes
1 parent 2f40bc7 commit f48f5ce

File tree

2 files changed

+26
-23
lines changed

2 files changed

+26
-23
lines changed

inbox/mailsync/backends/gmail.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from datetime import datetime, timedelta
2525
from threading import Semaphore
2626
from typing import TYPE_CHECKING, ClassVar
27+
import itertools
2728

2829
from sqlalchemy.orm import ( # type: ignore[import-untyped]
2930
joinedload,
@@ -124,24 +125,23 @@ def initial_sync_impl(self, crispin_client: "CrispinClient") -> None:
124125
# Prioritize UIDs for messages in the inbox folder.
125126
if len_remote_uids < 1e6:
126127
inbox_uids = set(
127-
crispin_client.search_uids(
128-
["X-GM-LABELS", "inbox"]
129-
)
128+
crispin_client.search_uids([
129+
"X-GM-LABELS",
130+
"inbox",
131+
])
130132
)
131133
else:
132134
# The search above is really slow (times out) on really
133135
# large mailboxes, so bound the search to messages within
134136
# the past month in order to get anywhere.
135137
since = datetime.utcnow() - timedelta(days=30)
136138
inbox_uids = set(
137-
crispin_client.search_uids(
138-
[
139-
"X-GM-LABELS",
140-
"inbox",
141-
"SINCE",
142-
since, # type: ignore[list-item]
143-
]
144-
)
139+
crispin_client.search_uids([
140+
"X-GM-LABELS",
141+
"inbox",
142+
"SINCE",
143+
since, # type: ignore[list-item]
144+
])
145145
)
146146

147147
uids_to_download = sorted(
@@ -188,6 +188,7 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
188188
self.folder_name, lambda *args: True
189189
)
190190
uidvalidity = crispin_client.selected_uidvalidity
191+
remote_uidnext = crispin_client.selected_uidnext
191192
if uidvalidity <= imap_folder_info_entry.uidvalidity:
192193
# if the remote UIDVALIDITY is less than or equal to -
193194
# from my (siro) understanding it should not be less than -
@@ -221,7 +222,7 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
221222
"FORCE INDEX (ix_imapuid_account_id_folder_id_msg_uid_desc)",
222223
)
223224
)
224-
225+
imap_uids_to_delete = []
225226
for entry in imap_uid_entries.yield_per(chunk_size):
226227
if entry.message.g_msgid in mapping:
227228
log.debug(
@@ -233,14 +234,22 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
233234
)
234235
entry.msg_uid = mapping[entry.message.g_msgid]
235236
else:
236-
db_session.delete(entry)
237+
imap_uids_to_delete.append(entry.msg_uid)
237238
log.debug(
238239
"UIDVALIDITY from {} to {}".format( # noqa: G001
239240
imap_folder_info_entry.uidvalidity, uidvalidity
240241
)
241242
)
243+
for uid_batch in itertools.batched(
244+
imap_uids_to_delete, chunk_size
245+
):
246+
for uid_to_delete in db_session.query(ImapUid).filter(
247+
ImapUid.msg_uid.in_(uid_batch)
248+
):
249+
db_session.delete(uid_to_delete)
242250
imap_folder_info_entry.uidvalidity = uidvalidity
243251
imap_folder_info_entry.highestmodseq = None
252+
imap_folder_info_entry.uidnext = remote_uidnext
244253
db_session.commit()
245254

246255
def __deduplicate_message_object_creation( # type: ignore[no-untyped-def]
@@ -469,15 +478,15 @@ def g_msgids(namespace_id, session, in_): # type: ignore[no-untyped-def] # noq
469478
.filter(Message.namespace_id == namespace_id)
470479
.all()
471480
)
472-
return sorted(g_msgid for g_msgid, in query if g_msgid in in_)
481+
return sorted(g_msgid for (g_msgid,) in query if g_msgid in in_)
473482
# But in the normal case that in_ only has a few elements, it's way better
474483
# to not fetch a bunch of values from MySQL only to return a few of them.
475484
query = (
476485
session.query(Message.g_msgid)
477486
.filter(Message.namespace_id == namespace_id, Message.g_msgid.in_(in_))
478487
.all()
479488
)
480-
return {g_msgid for g_msgid, in query}
489+
return {g_msgid for (g_msgid,) in query}
481490

482491

483492
class GmailSyncMonitor(ImapSyncMonitor):

inbox/models/backends/imap.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -505,21 +505,15 @@ class LabelItem(MailSyncBase, UpdatedAtMixin, DeletedAtMixin):
505505
imapuid = relationship(
506506
"ImapUid",
507507
backref=backref(
508-
"labelitems",
509-
collection_class=set,
510-
cascade="all, delete-orphan",
511-
passive_deletes=True,
508+
"labelitems", collection_class=set, cascade="all, delete-orphan"
512509
),
513510
)
514511

515512
label_id = Column(ForeignKey(Label.id, ondelete="CASCADE"), nullable=False)
516513
label = relationship(
517514
Label,
518515
backref=backref(
519-
"labelitems",
520-
cascade="all, delete-orphan",
521-
lazy="dynamic",
522-
passive_deletes=True,
516+
"labelitems", cascade="all, delete-orphan", lazy="dynamic"
523517
),
524518
)
525519

0 commit comments

Comments
 (0)