webrecorder · tw4l · Mar 18, 2026 · Mar 17, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py
@@ -27,7 +27,7 @@
     UpdateColl,
     DedupeIndexStats,
     DedupeIndexFile,
-    AddRemoveCrawlList,
+    CollectionAddRemove,
     BaseCrawl,
     CrawlFileOut,
     Organization,
@@ -1452,13 +1452,20 @@ async def update_collection(
         response_model=CollOut,
     )
     async def add_crawl_to_collection(
-        crawlList: AddRemoveCrawlList,
+        add_remove: CollectionAddRemove,
         coll_id: UUID,
         request: Request,
         org: Organization = Depends(org_crawl_dep),
     ) -> CollOut:
+        crawl_ids = set(add_remove.crawlIds)
+
+        for crawl_id in await colls.crawl_ops.get_config_crawl_ids(
+            add_remove.crawlconfigIds
+        ):
+            crawl_ids.add(crawl_id)
-        for crawl_id in await colls.crawl_ops.get_config_crawl_ids(
-            add_remove.crawlconfigIds
-        ):
-            crawl_ids.add(crawl_id)
+        crawl_ids.update(await colls.crawl_ops.get_config_crawl_ids(
+            add_remove.crawlconfigIds
+        ))
-        for crawl_id in await colls.crawl_ops.get_config_crawl_ids(
-            add_remove.crawlconfigIds
-        ):
-            crawl_ids.add(crawl_id)
+        crawl_ids.update(await colls.crawl_ops.get_config_crawl_ids(
+            add_remove.crawlconfigIds
+        ))
+
         return await colls.add_crawls_to_collection(
-            coll_id, crawlList.crawlIds, org, headers=dict(request.headers)
+            coll_id, list(crawl_ids), org, headers=dict(request.headers)
         )
 
     @app.post(
@@ -1467,13 +1474,20 @@ async def add_crawl_to_collection(
         response_model=CollOut,
     )
     async def remove_crawl_from_collection(
-        crawlList: AddRemoveCrawlList,
+        add_remove: CollectionAddRemove,
         coll_id: UUID,
         request: Request,
         org: Organization = Depends(org_crawl_dep),
     ) -> CollOut:
+        crawl_ids = set(add_remove.crawlIds)
+
+        for crawl_id in await colls.crawl_ops.get_config_crawl_ids(
+            add_remove.crawlconfigIds
+        ):
+            crawl_ids.add(crawl_id)
+
         return await colls.remove_crawls_from_collection(
-            coll_id, crawlList.crawlIds, org, headers=dict(request.headers)
+            coll_id, list(crawl_ids), org, headers=dict(request.headers)
         )
 
     @app.delete(

diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py
@@ -386,6 +386,12 @@ async def list_crawls(
 
         return crawls, total
 
+    async def get_config_crawl_ids(self, cids: list[UUID]) -> list[str]:
+        """get list of crawl ids belonging to given crawlconfigs"""
+        res = self.crawls.find({"cid": {"$in": cids}}, {"_id": 1})
+        res_list = await res.to_list()
+        return [res["_id"] for res in res_list]
+
     async def get_active_crawls(self, oid: UUID, limit: int) -> list[str]:
         """get list of waiting crawls, sorted from earliest to latest"""
         res = (

diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
@@ -1859,10 +1859,11 @@ class UpdateCollHomeUrl(BaseModel):
 
 
 # ============================================================================
-class AddRemoveCrawlList(BaseModel):
-    """Collections to add or remove from collection"""
+class CollectionAddRemove(BaseModel):
+    """Items to add or remove from collection"""
 
     crawlIds: List[str] = []
+    crawlconfigIds: List[UUID] = []
 
 
 # ============================================================================

diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py
@@ -354,6 +354,71 @@ def test_add_remove_crawl_from_collection(
     )
     assert _coll_id not in r.json()["collectionIds"]
 
+
+def test_add_remove_config_crawls_from_collection(
+    crawler_auth_headers,
+    default_org_id,
+    crawler_crawl_id,
+    crawler_config_id,
+    admin_crawl_id,
+    admin_config_id,
+):
+    # Add crawls by config and crawl id
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add",
+        json={"crawlIds": [admin_crawl_id], "crawlconfigIds": [crawler_config_id]},
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["id"] == _coll_id
+    assert data["crawlCount"] == 2
+    assert data["pageCount"] > 0
+    assert data["uniquePageCount"] > 0
+    assert data["totalSize"] > 0
+    assert data["modified"] >= modified
+    assert data["tags"] == ["wr-test-2", "wr-test-1"]
+    assert data["dateEarliest"]
+    assert data["dateLatest"]
+    assert data["topPageHosts"]
+
+    # Remove crawls by crawl and config id, and test that specifying a
+    # config and also a crawl in that config separately is handled
+    # gracefully
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/remove",
+        json={
+            "crawlIds": [crawler_crawl_id],
+            "crawlconfigIds": [admin_config_id, crawler_config_id],
+        },
+        headers=crawler_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["id"] == _coll_id
+    assert data["crawlCount"] == 0
+    assert data["pageCount"] == 0
+    assert data["uniquePageCount"] == 0
+    assert data["totalSize"] == 0
+    assert data["modified"] >= modified
+    assert data.get("tags", []) == []
+    assert data.get("dateEarliest") is None
+    assert data.get("dateLatest") is None
+    assert data["topPageHosts"] == []
+
+    # Verify crawls were removed
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json",
+        headers=crawler_auth_headers,
+    )
+    assert _coll_id not in r.json()["collectionIds"]
+
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawler_crawl_id}/replay.json",
+        headers=crawler_auth_headers,
+    )
+    assert _coll_id not in r.json()["collectionIds"]
+
     # Add crawls back for further tests
     r = requests.post(
         f"{API_PREFIX}/orgs/{default_org_id}/collections/{_coll_id}/add",