Skip to content

Commit 4a4cde6

Browse files
committed
feat: add MongoBuildCleanupManager and related commands for managing MongoDB builds
1 parent 0cf3a49 commit 4a4cde6

File tree

5 files changed

+107
-6
lines changed

5 files changed

+107
-6
lines changed

biothings/hub/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ def configure_index_manager(self):
701701

702702
def configure_snapshot_manager(self):
703703
assert "index" in self.features, "'snapshot' feature requires 'index'"
704+
from biothings.hub.dataindex.mongo_build_cleanup import MongoBuildCleanupManager
704705
from biothings.hub.dataindex.snapshooter import SnapshotManager
705706

706707
args = self.mixargs("snapshot")
@@ -713,6 +714,7 @@ def configure_snapshot_manager(self):
713714
snapshot_manager.configure(config.SNAPSHOT_CONFIG)
714715
snapshot_manager.poll("snapshot", snapshot_manager.snapshot_a_build)
715716
self.managers["snapshot_manager"] = snapshot_manager
717+
self.managers["mongo_build_cleanup_manager"] = MongoBuildCleanupManager(job_manager=self.managers["job_manager"])
716718

717719
def configure_auto_snapshot_cleaner_manager(self):
718720
assert "snapshot" in self.features, "'auto_snapshot_cleaner' feature requires 'snapshot'"
@@ -1148,6 +1150,9 @@ def configure_commands(self):
11481150
self.commands["list_snapshots"] = self.managers["snapshot_manager"].list_snapshots
11491151
self.commands["delete_snapshots"] = self.managers["snapshot_manager"].delete_snapshots
11501152
self.commands["validate_snapshots"] = self.managers["snapshot_manager"].validate_snapshots
1153+
if self.managers.get("mongo_build_cleanup_manager"):
1154+
self.commands["list_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].list_mongo_builds
1155+
self.commands["delete_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].delete_mongo_builds
11511156
# data release commands
11521157
if self.managers.get("release_manager"):
11531158
self.commands["create_release_note"] = self.managers["release_manager"].create_release_note
@@ -1514,6 +1519,12 @@ def configure_api_endpoints(self):
15141519
)
15151520
if "validate_snapshots" in cmdnames:
15161521
self.api_endpoints["validate_snapshots"] = EndpointDefinition(name="validate_snapshots", method="post")
1522+
if "list_mongo_builds" in cmdnames:
1523+
self.api_endpoints["list_mongo_builds"] = EndpointDefinition(name="list_mongo_builds", method="get")
1524+
if "delete_mongo_builds" in cmdnames:
1525+
self.api_endpoints["delete_mongo_builds"] = EndpointDefinition(
1526+
name="delete_mongo_builds", method="put", force_bodyargs=True
1527+
)
15171528
if "sync" in cmdnames:
15181529
self.api_endpoints["sync"] = EndpointDefinition(name="sync", method="post", force_bodyargs=True)
15191530
if "whatsnew" in cmdnames:
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from functools import partial
2+
3+
from biothings.hub.manager import BaseManager
4+
from biothings.utils.hub_db import get_src_build
5+
from config import logger as logging
6+
7+
8+
class MongoBuildCleaner:
9+
def __init__(self, job_manager):
10+
self.job_manager = job_manager
11+
12+
def list_builds(self, build_config=None, build_name=None):
13+
collection = get_src_build()
14+
15+
filters = {}
16+
if build_config:
17+
filters["build_config._id"] = build_config
18+
if build_name:
19+
filters["_id"] = build_name
20+
21+
projection = {
22+
"_id": 1,
23+
"build_config": 1,
24+
"started_at": 1,
25+
"archived": 1,
26+
"target_name": 1,
27+
}
28+
builds = list(collection.find(filters, projection).sort("started_at", -1))
29+
30+
grouped = {}
31+
for build in builds:
32+
group_name = build.get("build_config", {}).get("_id") or "N/A"
33+
grouped.setdefault(group_name, []).append(build)
34+
35+
return [{"_id": key, "items": items} for key, items in grouped.items()]
36+
37+
async def delete_builds(self, build_ids):
38+
if not build_ids:
39+
return {"deleted_count": 0}
40+
41+
from biothings.utils import mongo
42+
43+
conn = mongo.get_hub_db_async_conn()
44+
try:
45+
src_build = mongo.get_src_build_async(conn)
46+
result = await src_build.delete_many({"_id": {"$in": build_ids}})
47+
return {"deleted_count": result.deleted_count}
48+
finally:
49+
conn.close()
50+
51+
def done(self, future):
52+
try:
53+
result = future.result()
54+
logging.info("Deleted %d MongoDB builds", result.get("deleted_count", 0), extra={"notify": True})
55+
except Exception as exc:
56+
logging.exception("Failed to delete MongoDB builds: %s", exc, extra={"notify": True})
57+
58+
59+
class MongoBuildCleanupManager(BaseManager):
60+
def __init__(self, *args, **kwargs):
61+
super().__init__(*args, **kwargs)
62+
self.cleaner = MongoBuildCleaner(self.job_manager)
63+
64+
def list_mongo_builds(self, build_config=None, build_name=None):
65+
return self.cleaner.list_builds(build_config=build_config, build_name=build_name)
66+
67+
def delete_mongo_builds(self, build_ids):
68+
try:
69+
job = self.job_manager.submit(partial(self.cleaner.delete_builds, build_ids))
70+
job.add_done_callback(self.cleaner.done)
71+
except Exception as ex:
72+
logging.exception("Error while submitting MongoDB build deletion job: %s", ex, extra={"notify": True})
73+
raise
74+
return job

biothings/hub/dataindex/snapshooter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
from functools import partial
99

1010
import boto3
11-
from config import logger as logging
1211
from elasticsearch import Elasticsearch
13-
from elasticsearch.exceptions import TransportError, NotFoundError
12+
from elasticsearch.exceptions import NotFoundError, TransportError
1413

1514
from biothings import config as btconfig
1615
from biothings.hub import SNAPSHOOTER_CATEGORY
@@ -22,6 +21,7 @@
2221
from biothings.utils.hub import template_out
2322
from biothings.utils.hub_db import get_src_build
2423
from biothings.utils.loggers import get_logger
24+
from config import logger as logging
2525

2626
from . import snapshot_cleanup as cleaner, snapshot_registrar as registrar
2727
from .snapshot_repo import Repository

biothings/utils/mongo.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import bson
1212
import dateutil.parser as date_parser
13-
from pymongo import DESCENDING, MongoClient
13+
from pymongo import DESCENDING, AsyncMongoClient, MongoClient
1414
from pymongo.client_session import ClientSession
1515
from pymongo.collection import Collection as PymongoCollection
1616
from pymongo.database import Database as PymongoDatabase
@@ -155,6 +155,10 @@ def __getitem__(self, name):
155155
return Database(self, name)
156156

157157

158+
class AsyncDatabaseClient(AsyncMongoClient):
159+
pass
160+
161+
158162
def requires_config(func):
159163
@wraps(func)
160164
def func_wrapper(*args, **kwargs):
@@ -192,6 +196,12 @@ def get_hub_db_conn():
192196
return conn
193197

194198

199+
@requires_config
200+
def get_hub_db_async_conn():
201+
conn = AsyncDatabaseClient(config.HUB_DB_BACKEND["uri"])
202+
return conn
203+
204+
195205
@requires_config
196206
def get_src_conn():
197207
return get_conn(config.DATA_SRC_SERVER, getattr(config, "DATA_SRC_PORT", 27017))
@@ -221,6 +231,12 @@ def get_src_build(conn=None):
221231
return conn[config.DATA_HUB_DB_DATABASE][config.DATA_SRC_BUILD_COLLECTION]
222232

223233

234+
@requires_config
235+
def get_src_build_async(conn=None):
236+
conn = conn or get_hub_db_async_conn()
237+
return conn[config.DATA_HUB_DB_DATABASE][config.DATA_SRC_BUILD_COLLECTION]
238+
239+
224240
@requires_config
225241
def get_src_build_config(conn=None):
226242
conn = conn or get_hub_db_conn()

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ opensearch = [
8686
]
8787
# minimal requirements for running biothings.hub, e.g. in CLI mode
8888
hubcore = [
89-
"pymongo>=4.1.0,<5.0", # support MongoDB 5.0 since v3.12.0
89+
"pymongo>=4.13.0,<5.0", # AsyncMongoClient stable since 4.13.0
9090
]
9191
# extra requirements to run a full biothings.hub
9292
hub = [
93-
"pymongo>=4.1.0,<5.0",
93+
"pymongo>=4.13.0,<5.0",
9494
"beautifulsoup4", # used in dumper.GoogleDriveDumper
9595
"aiocron==1.8", # setup scheduled jobs
9696
# "aiohttp==3.8.4", # elasticsearch requires aiohttp>=3,<4
@@ -120,7 +120,7 @@ hub = [
120120
]
121121
# minimal requirements for to run biothings CLI
122122
cli = [
123-
"pymongo>=4.1.0,<5.0", # support MongoDB 5.0 since v3.12.0
123+
"pymongo>=4.13.0,<5.0", # AsyncMongoClient stable since 4.13.0
124124
"psutil",
125125
"jsonschema>=2.6.0",
126126
"typer>=0.17.0", # required for CLI, also installs rich package

0 commit comments

Comments
 (0)