Skip to content

Commit 7d58821

Browse files
committed
add validate_mongo_builds method and API endpoint
1 parent e42b018 commit 7d58821

File tree

2 files changed

+81
-3
lines changed

2 files changed

+81
-3
lines changed

biothings/hub/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,7 @@ def configure_commands(self):
11531153
if self.managers.get("mongo_build_cleanup_manager"):
11541154
self.commands["list_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].list_mongo_builds
11551155
self.commands["delete_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].delete_mongo_builds
1156+
self.commands["validate_mongo_builds"] = self.managers["mongo_build_cleanup_manager"].validate_mongo_builds
11561157
# data release commands
11571158
if self.managers.get("release_manager"):
11581159
self.commands["create_release_note"] = self.managers["release_manager"].create_release_note
@@ -1525,6 +1526,10 @@ def configure_api_endpoints(self):
15251526
self.api_endpoints["mongo_builds/delete"] = EndpointDefinition(
15261527
name="delete_mongo_builds", method="put", force_bodyargs=True
15271528
)
1529+
if "validate_mongo_builds" in cmdnames:
1530+
self.api_endpoints["mongo_builds/validate"] = EndpointDefinition(
1531+
name="validate_mongo_builds", method="post"
1532+
)
15281533
if "sync" in cmdnames:
15291534
self.api_endpoints["sync"] = EndpointDefinition(name="sync", method="post", force_bodyargs=True)
15301535
if "whatsnew" in cmdnames:

biothings/hub/dataindex/mongo_build_cleanup.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,22 @@ class MongoBuildCleaner:
1010
def __init__(self, job_manager):
1111
self.job_manager = job_manager
1212

13-
def list_builds(self, build_config=None, build_name=None):
13+
def list_builds(self, build_config=None, build_name=None, year=None):
1414
collection = get_src_build()
1515

1616
filters = {}
1717
if build_config:
1818
filters["build_config._id"] = build_config
1919
if build_name:
2020
filters["_id"] = build_name
21+
if year:
22+
from datetime import datetime
23+
24+
year = int(year)
25+
filters["started_at"] = {
26+
"$gte": datetime(year, 1, 1),
27+
"$lt": datetime(year + 1, 1, 1),
28+
}
2129

2230
projection = {
2331
"_id": 1,
@@ -81,6 +89,51 @@ async def delete_builds(self, build_ids):
8189
finally:
8290
await conn.close()
8391

92+
async def validate_builds(self):
93+
"""Validate that target collections exist for each build record.
94+
95+
Checks every build in src_build to see if its target collection still
96+
exists in the target database. Build records whose target collections
97+
have been removed are deleted, keeping the database in sync with the
98+
actual data.
99+
100+
Returns a dict with ``builds_removed`` (count) and ``builds_removed_names``.
101+
"""
102+
from biothings.utils import mongo
103+
104+
logging.info("Starting validation of MongoDB builds...")
105+
conn = mongo.get_hub_db_async_conn()
106+
try:
107+
src_build = mongo.get_src_build_async(conn)
108+
target_db = conn[btconfig.DATA_TARGET_DATABASE]
109+
110+
existing_collections = set(await target_db.list_collection_names())
111+
112+
orphaned_ids = []
113+
async for doc in src_build.find({}, {"_id": 1, "target_name": 1}):
114+
build_id = doc["_id"]
115+
target_name = doc.get("target_name") or build_id
116+
if target_name not in existing_collections:
117+
orphaned_ids.append(build_id)
118+
119+
if orphaned_ids:
120+
result = await src_build.delete_many({"_id": {"$in": orphaned_ids}})
121+
deleted_count = result.deleted_count
122+
else:
123+
deleted_count = 0
124+
125+
logging.info(
126+
"Build validation complete: removed %d orphaned build record(s)",
127+
deleted_count,
128+
extra={"notify": True},
129+
)
130+
return {
131+
"builds_removed": deleted_count,
132+
"builds_removed_names": sorted(orphaned_ids),
133+
}
134+
finally:
135+
await conn.close()
136+
84137
def done(self, future):
85138
try:
86139
result = future.result()
@@ -93,14 +146,25 @@ def done(self, future):
93146
except Exception as exc:
94147
logging.exception("Failed to delete MongoDB builds: %s", exc, extra={"notify": True})
95148

149+
def validate_done(self, future):
150+
try:
151+
result = future.result()
152+
logging.info(
153+
"Build validation complete: removed %d orphaned build record(s)",
154+
result.get("builds_removed", 0),
155+
extra={"notify": True},
156+
)
157+
except Exception as exc:
158+
logging.exception("Failed to validate MongoDB builds: %s", exc, extra={"notify": True})
159+
96160

97161
class MongoBuildCleanupManager(BaseManager):
98162
def __init__(self, *args, **kwargs):
99163
super().__init__(*args, **kwargs)
100164
self.cleaner = MongoBuildCleaner(self.job_manager)
101165

102-
def list_mongo_builds(self, build_config=None, build_name=None):
103-
return self.cleaner.list_builds(build_config=build_config, build_name=build_name)
166+
def list_mongo_builds(self, build_config=None, build_name=None, year=None):
167+
return self.cleaner.list_builds(build_config=build_config, build_name=build_name, year=year)
104168

105169
def delete_mongo_builds(self, build_ids):
106170
try:
@@ -110,3 +174,12 @@ def delete_mongo_builds(self, build_ids):
110174
logging.exception("Error while submitting MongoDB build deletion job: %s", ex, extra={"notify": True})
111175
raise
112176
return job
177+
178+
def validate_mongo_builds(self):
179+
try:
180+
job = self.job_manager.submit(partial(self.cleaner.validate_builds))
181+
job.add_done_callback(self.cleaner.validate_done)
182+
except Exception as ex:
183+
logging.exception("Error while submitting MongoDB build validation job: %s", ex, extra={"notify": True})
184+
raise
185+
return job

0 commit comments

Comments
 (0)