webrecorder · ikreymer · Mar 10, 2026
diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
@@ -286,6 +286,19 @@ async def remove_crawls_from_collection(
         headers: Optional[dict] = None,
     ) -> CollOut:
         """Remove crawls from collection"""
+        num_req_crawls = await self.crawls.count_documents(
+            {
+                "dedupeCollId": coll_id,
+                "requiresCrawls": {"$in": crawl_ids},
+                "collectionIds": coll_id,
+                "_id": {"$nin": crawl_ids},
+            }
+        )
+        # if any of the crawls have crawls that dependent on them, and aren't being removed themselves
+        # don't allow remove
+        if num_req_crawls > 0:
+            raise HTTPException(status_code=400, detail="cant_remove_required_crawls")
+
         await self.crawl_ops.remove_from_collection(crawl_ids, coll_id)
         modified = dt_now()
         result = await self.collections.find_one_and_update(

diff --git a/backend/test_nightly/test_dedupe.py b/backend/test_nightly/test_dedupe.py
@@ -279,6 +279,19 @@ def test_import_into_another_coll(
     assert stats == {**orig_stats, "updateProgress": 1.0}
 
 
+def test_cant_remove_dependency_crawl(
+    default_org_id, dedupe_coll_id, crawler_auth_headers, dedupe_first_crawl
+):
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{dedupe_coll_id}/remove",
+        json={"crawlIds": [dedupe_first_crawl]},
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 400
+
+    assert r.json()["detail"] == "cant_remove_required_crawls"
+
+
 def test_remove_crawl_from_collection(
     default_org_id, dedupe_coll_id, crawler_auth_headers, dedupe_second_crawl
 ):
@@ -373,7 +386,6 @@ def test_cant_delete_while_crawling(
     data = r.json()
     assert data["success"]
 
-
 def test_can_delete_while_indexing(
     default_org_id,
     dedupe_coll_id,