@@ -164,11 +164,16 @@ def _get_co_links_from_record(docdb_record: dict) -> List[str]:
164164 external_links = external_links .get (
165165 ExternalPlatforms .CODEOCEAN .value , []
166166 )
167- else :
167+ elif isinstance (external_links , list ):
168+ if not all (isinstance (r , dict ) for r in external_links ):
169+ raise ValueError (f"Invalid external_links for: { docdb_record } " )
168170 external_links = [
169171 r .get (ExternalPlatforms .CODEOCEAN .value )
170172 for r in external_links
173+ if r .get (ExternalPlatforms .CODEOCEAN .value ) is not None
171174 ]
175+ else :
176+ raise ValueError (f"Invalid external_links for: { docdb_record } " )
172177 return external_links
173178
174179 def _update_external_links_in_docdb (
@@ -213,37 +218,45 @@ def _update_external_links_in_docdb(
213218 for page in pages :
214219 records_to_update = []
215220 for record in page :
216- location = record .get ("location" )
217- external_links = self ._get_co_links_from_record (record )
218- code_ocean_ids = (
219- None
220- if location is None
221- else co_loc_to_id_map .get (location )
222- )
223- docdb_rec_id = record ["_id" ]
224- if code_ocean_ids is not None and code_ocean_ids != set (
225- external_links
226- ):
227- new_external_links = code_ocean_ids
228- elif external_links and not code_ocean_ids :
229- logging .info (
230- f"No code ocean data asset ids found for "
231- f"{ location } . Removing external links from record."
221+ try :
222+ location = record .get ("location" )
223+ external_links = self ._get_co_links_from_record (record )
224+ code_ocean_ids = (
225+ None
226+ if location is None
227+ else co_loc_to_id_map .get (location )
232228 )
233- new_external_links = set ()
234- else :
235- new_external_links = None
236- if new_external_links is not None :
237- record_links = {
238- ExternalPlatforms .CODEOCEAN .value : sorted (
239- list (new_external_links )
229+ docdb_rec_id = record ["_id" ]
230+ if (
231+ code_ocean_ids is not None
232+ and code_ocean_ids != set (external_links )
233+ ):
234+ new_external_links = code_ocean_ids
235+ elif external_links and not code_ocean_ids :
236+ logging .info (
237+ f"No code ocean data asset ids found for "
238+ f"{ location } . Removing external links from "
239+ "record."
240240 )
241- }
242- records_to_update .append (
243- {
244- "_id" : docdb_rec_id ,
245- "external_links" : record_links ,
241+ new_external_links = set ()
242+ else :
243+ new_external_links = None
244+ if new_external_links is not None :
245+ record_links = {
246+ ExternalPlatforms .CODEOCEAN .value : sorted (
247+ list (new_external_links )
248+ )
246249 }
250+ records_to_update .append (
251+ {
252+ "_id" : docdb_rec_id ,
253+ "external_links" : record_links ,
254+ }
255+ )
256+ except Exception as e :
257+ logging .error (
258+ f'Error processing { record .get ("location" )} : '
259+ f"{ repr (e )} "
247260 )
248261 if len (records_to_update ) > 0 :
249262 logging .info (f"Updating { len (records_to_update )} records" )
0 commit comments