Use batched deletes rather than passive_deletes

mrhiggi-close · mrhiggi-close · commit f48f5ce2fabb · 2025-08-06T15:39:50.000-04:00
diff --git a/inbox/mailsync/backends/gmail.py b/inbox/mailsync/backends/gmail.py
@@ -24,6 +24,7 @@
 from datetime import datetime, timedelta
 from threading import Semaphore
 from typing import TYPE_CHECKING, ClassVar
+import itertools
 
 from sqlalchemy.orm import (  # type: ignore[import-untyped]
     joinedload,
@@ -124,24 +125,23 @@ def initial_sync_impl(self, crispin_client: "CrispinClient") -> None:
                     # Prioritize UIDs for messages in the inbox folder.
                     if len_remote_uids < 1e6:
                         inbox_uids = set(
-                            crispin_client.search_uids(
-                                ["X-GM-LABELS", "inbox"]
-                            )
+                            crispin_client.search_uids([
+                                "X-GM-LABELS",
+                                "inbox",
+                            ])
                         )
                     else:
                         # The search above is really slow (times out) on really
                         # large mailboxes, so bound the search to messages within
                         # the past month in order to get anywhere.
                         since = datetime.utcnow() - timedelta(days=30)
                         inbox_uids = set(
-                            crispin_client.search_uids(
-                                [
-                                    "X-GM-LABELS",
-                                    "inbox",
-                                    "SINCE",
-                                    since,  # type: ignore[list-item]
-                                ]
-                            )
+                            crispin_client.search_uids([
+                                "X-GM-LABELS",
+                                "inbox",
+                                "SINCE",
+                                since,  # type: ignore[list-item]
+                            ])
                         )
 
                     uids_to_download = sorted(
@@ -188,6 +188,7 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
                     self.folder_name, lambda *args: True
                 )
                 uidvalidity = crispin_client.selected_uidvalidity
+                remote_uidnext = crispin_client.selected_uidnext
                 if uidvalidity <= imap_folder_info_entry.uidvalidity:
                     # if the remote UIDVALIDITY is less than or equal to -
                     # from my (siro) understanding it should not be less than -
@@ -221,7 +222,7 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
                     "FORCE INDEX (ix_imapuid_account_id_folder_id_msg_uid_desc)",
                 )
             )
-
+            imap_uids_to_delete = []
             for entry in imap_uid_entries.yield_per(chunk_size):
                 if entry.message.g_msgid in mapping:
                     log.debug(
@@ -233,14 +234,22 @@ def resync_uids_impl(self, chunk_size: int = 1000) -> None:
                     )
                     entry.msg_uid = mapping[entry.message.g_msgid]
                 else:
-                    db_session.delete(entry)
+                    imap_uids_to_delete.append(entry.msg_uid)
             log.debug(
                 "UIDVALIDITY from {} to {}".format(  # noqa: G001
                     imap_folder_info_entry.uidvalidity, uidvalidity
                 )
             )
+            for uid_batch in itertools.batched(
+                imap_uids_to_delete, chunk_size
+            ):
+                for uid_to_delete in db_session.query(ImapUid).filter(
+                    ImapUid.msg_uid.in_(uid_batch)
+                ):
+                    db_session.delete(uid_to_delete)
             imap_folder_info_entry.uidvalidity = uidvalidity
             imap_folder_info_entry.highestmodseq = None
+            imap_folder_info_entry.uidnext = remote_uidnext
             db_session.commit()
 
     def __deduplicate_message_object_creation(  # type: ignore[no-untyped-def]
@@ -469,15 +478,15 @@ def g_msgids(namespace_id, session, in_):  # type: ignore[no-untyped-def]  # noq
             .filter(Message.namespace_id == namespace_id)
             .all()
         )
-        return sorted(g_msgid for g_msgid, in query if g_msgid in in_)
+        return sorted(g_msgid for (g_msgid,) in query if g_msgid in in_)
     # But in the normal case that in_ only has a few elements, it's way better
     # to not fetch a bunch of values from MySQL only to return a few of them.
     query = (
         session.query(Message.g_msgid)
         .filter(Message.namespace_id == namespace_id, Message.g_msgid.in_(in_))
         .all()
     )
-    return {g_msgid for g_msgid, in query}
+    return {g_msgid for (g_msgid,) in query}
 
 
 class GmailSyncMonitor(ImapSyncMonitor):
diff --git a/inbox/models/backends/imap.py b/inbox/models/backends/imap.py
@@ -505,21 +505,15 @@ class LabelItem(MailSyncBase, UpdatedAtMixin, DeletedAtMixin):
     imapuid = relationship(
         "ImapUid",
         backref=backref(
-            "labelitems",
-            collection_class=set,
-            cascade="all, delete-orphan",
-            passive_deletes=True,
+            "labelitems", collection_class=set, cascade="all, delete-orphan"
         ),
     )
 
     label_id = Column(ForeignKey(Label.id, ondelete="CASCADE"), nullable=False)
     label = relationship(
         Label,
         backref=backref(
-            "labelitems",
-            cascade="all, delete-orphan",
-            lazy="dynamic",
-            passive_deletes=True,
+            "labelitems", cascade="all, delete-orphan", lazy="dynamic"
         ),
     )