@@ -10,14 +10,22 @@ class MongoBuildCleaner:
1010 def __init__ (self , job_manager ):
1111 self .job_manager = job_manager
1212
13- def list_builds (self , build_config = None , build_name = None ):
13+ def list_builds (self , build_config = None , build_name = None , year = None ):
1414 collection = get_src_build ()
1515
1616 filters = {}
1717 if build_config :
1818 filters ["build_config._id" ] = build_config
1919 if build_name :
2020 filters ["_id" ] = build_name
21+ if year :
22+ from datetime import datetime
23+
24+ year = int (year )
25+ filters ["started_at" ] = {
26+ "$gte" : datetime (year , 1 , 1 ),
27+ "$lt" : datetime (year + 1 , 1 , 1 ),
28+ }
2129
2230 projection = {
2331 "_id" : 1 ,
@@ -81,6 +89,51 @@ async def delete_builds(self, build_ids):
8189 finally :
8290 await conn .close ()
8391
92+ async def validate_builds (self ):
93+ """Validate that target collections exist for each build record.
94+
95+ Checks every build in src_build to see if its target collection still
96+ exists in the target database. Build records whose target collections
97+ have been removed are deleted, keeping the database in sync with the
98+ actual data.
99+
100+ Returns a dict with ``builds_removed`` (count) and ``builds_removed_names``.
101+ """
102+ from biothings .utils import mongo
103+
104+ logging .info ("Starting validation of MongoDB builds..." )
105+ conn = mongo .get_hub_db_async_conn ()
106+ try :
107+ src_build = mongo .get_src_build_async (conn )
108+ target_db = conn [btconfig .DATA_TARGET_DATABASE ]
109+
110+ existing_collections = set (await target_db .list_collection_names ())
111+
112+ orphaned_ids = []
113+ async for doc in src_build .find ({}, {"_id" : 1 , "target_name" : 1 }):
114+ build_id = doc ["_id" ]
115+ target_name = doc .get ("target_name" ) or build_id
116+ if target_name not in existing_collections :
117+ orphaned_ids .append (build_id )
118+
119+ if orphaned_ids :
120+ result = await src_build .delete_many ({"_id" : {"$in" : orphaned_ids }})
121+ deleted_count = result .deleted_count
122+ else :
123+ deleted_count = 0
124+
125+ logging .info (
126+ "Build validation complete: removed %d orphaned build record(s)" ,
127+ deleted_count ,
128+ extra = {"notify" : True },
129+ )
130+ return {
131+ "builds_removed" : deleted_count ,
132+ "builds_removed_names" : sorted (orphaned_ids ),
133+ }
134+ finally :
135+ await conn .close ()
136+
84137 def done (self , future ):
85138 try :
86139 result = future .result ()
@@ -93,14 +146,25 @@ def done(self, future):
93146 except Exception as exc :
94147 logging .exception ("Failed to delete MongoDB builds: %s" , exc , extra = {"notify" : True })
95148
149+ def validate_done (self , future ):
150+ try :
151+ result = future .result ()
152+ logging .info (
153+ "Build validation complete: removed %d orphaned build record(s)" ,
154+ result .get ("builds_removed" , 0 ),
155+ extra = {"notify" : True },
156+ )
157+ except Exception as exc :
158+ logging .exception ("Failed to validate MongoDB builds: %s" , exc , extra = {"notify" : True })
159+
96160
97161class MongoBuildCleanupManager (BaseManager ):
98162 def __init__ (self , * args , ** kwargs ):
99163 super ().__init__ (* args , ** kwargs )
100164 self .cleaner = MongoBuildCleaner (self .job_manager )
101165
102- def list_mongo_builds (self , build_config = None , build_name = None ):
103- return self .cleaner .list_builds (build_config = build_config , build_name = build_name )
166+ def list_mongo_builds (self , build_config = None , build_name = None , year = None ):
167+ return self .cleaner .list_builds (build_config = build_config , build_name = build_name , year = year )
104168
105169 def delete_mongo_builds (self , build_ids ):
106170 try :
@@ -110,3 +174,12 @@ def delete_mongo_builds(self, build_ids):
110174 logging .exception ("Error while submitting MongoDB build deletion job: %s" , ex , extra = {"notify" : True })
111175 raise
112176 return job
177+
178+ def validate_mongo_builds (self ):
179+ try :
180+ job = self .job_manager .submit (partial (self .cleaner .validate_builds ))
181+ job .add_done_callback (self .cleaner .validate_done )
182+ except Exception as ex :
183+ logging .exception ("Error while submitting MongoDB build validation job: %s" , ex , extra = {"notify" : True })
184+ raise
185+ return job
0 commit comments