Skip to content

Commit 616d393

Browse files
authored
Merge pull request #179 from AllenNeuralDynamics/release-v0.21.1
Release v0.21.1
2 parents 78c4c50 + db3891d commit 616d393

File tree

3 files changed

+78
-30
lines changed

3 files changed

+78
-30
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Package"""
22

3-
__version__ = "0.21.0"
3+
__version__ = "0.21.1"

src/aind_data_asset_indexer/codeocean_bucket_indexer.py

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,16 @@ def _get_co_links_from_record(docdb_record: dict) -> List[str]:
164164
external_links = external_links.get(
165165
ExternalPlatforms.CODEOCEAN.value, []
166166
)
167-
else:
167+
elif isinstance(external_links, list):
168+
if not all(isinstance(r, dict) for r in external_links):
169+
raise ValueError(f"Invalid external_links for: {docdb_record}")
168170
external_links = [
169171
r.get(ExternalPlatforms.CODEOCEAN.value)
170172
for r in external_links
173+
if r.get(ExternalPlatforms.CODEOCEAN.value) is not None
171174
]
175+
else:
176+
raise ValueError(f"Invalid external_links for: {docdb_record}")
172177
return external_links
173178

174179
def _update_external_links_in_docdb(
@@ -213,37 +218,45 @@ def _update_external_links_in_docdb(
213218
for page in pages:
214219
records_to_update = []
215220
for record in page:
216-
location = record.get("location")
217-
external_links = self._get_co_links_from_record(record)
218-
code_ocean_ids = (
219-
None
220-
if location is None
221-
else co_loc_to_id_map.get(location)
222-
)
223-
docdb_rec_id = record["_id"]
224-
if code_ocean_ids is not None and code_ocean_ids != set(
225-
external_links
226-
):
227-
new_external_links = code_ocean_ids
228-
elif external_links and not code_ocean_ids:
229-
logging.info(
230-
f"No code ocean data asset ids found for "
231-
f"{location}. Removing external links from record."
221+
try:
222+
location = record.get("location")
223+
external_links = self._get_co_links_from_record(record)
224+
code_ocean_ids = (
225+
None
226+
if location is None
227+
else co_loc_to_id_map.get(location)
232228
)
233-
new_external_links = set()
234-
else:
235-
new_external_links = None
236-
if new_external_links is not None:
237-
record_links = {
238-
ExternalPlatforms.CODEOCEAN.value: sorted(
239-
list(new_external_links)
229+
docdb_rec_id = record["_id"]
230+
if (
231+
code_ocean_ids is not None
232+
and code_ocean_ids != set(external_links)
233+
):
234+
new_external_links = code_ocean_ids
235+
elif external_links and not code_ocean_ids:
236+
logging.info(
237+
f"No code ocean data asset ids found for "
238+
f"{location}. Removing external links from "
239+
"record."
240240
)
241-
}
242-
records_to_update.append(
243-
{
244-
"_id": docdb_rec_id,
245-
"external_links": record_links,
241+
new_external_links = set()
242+
else:
243+
new_external_links = None
244+
if new_external_links is not None:
245+
record_links = {
246+
ExternalPlatforms.CODEOCEAN.value: sorted(
247+
list(new_external_links)
248+
)
246249
}
250+
records_to_update.append(
251+
{
252+
"_id": docdb_rec_id,
253+
"external_links": record_links,
254+
}
255+
)
256+
except Exception as e:
257+
logging.error(
258+
f'Error processing {record.get("location")}: '
259+
f"{repr(e)}"
247260
)
248261
if len(records_to_update) > 0:
249262
logging.info(f"Updating {len(records_to_update)} records")

tests/test_codeocean_bucket_indexer.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,32 @@ def test_get_co_links_from_record_legacy(self):
207207
)
208208
self.assertEqual(["abc-123", "def-456"], output)
209209

210+
def test_get_co_links_from_record_invalid_list(self):
211+
"""Tests _get_co_links_from_record with invalid external_links list"""
212+
docdb_record = {
213+
"_id": "12345",
214+
"location": "s3://bucket/prefix",
215+
"external_links": ["abc-123", "def-456"],
216+
}
217+
with self.assertRaises(ValueError) as e:
218+
self.basic_job._get_co_links_from_record(docdb_record=docdb_record)
219+
self.assertEqual(
220+
f"Invalid external_links for: {docdb_record}", str(e.exception)
221+
)
222+
223+
def test_get_co_links_from_record_invalid_other(self):
224+
"""Tests _get_co_links_from_record with invalid external_links type"""
225+
docdb_record = {
226+
"_id": "12345",
227+
"location": "s3://bucket/prefix",
228+
"external_links": "abc-123",
229+
}
230+
with self.assertRaises(ValueError) as e:
231+
self.basic_job._get_co_links_from_record(docdb_record=docdb_record)
232+
self.assertEqual(
233+
f"Invalid external_links for: {docdb_record}", str(e.exception)
234+
)
235+
210236
@patch("aind_data_asset_indexer.codeocean_bucket_indexer.MetadataDbClient")
211237
@patch("codeocean.data_asset.DataAssets.search_data_assets_iterator")
212238
@patch("aind_data_asset_indexer.codeocean_bucket_indexer.paginate_docdb")
@@ -262,6 +288,11 @@ def test_update_external_links_in_docdb(
262288
"location": "s3://bucket2/prefix4",
263289
"external_links": [],
264290
},
291+
{
292+
"_id": "0004",
293+
"location": "s3://bucket3/prefix5",
294+
"external_links": ["def-456"],
295+
},
265296
]
266297
]
267298

@@ -273,6 +304,10 @@ def test_update_external_links_in_docdb(
273304
expected_log_messages = [
274305
"INFO:root:No code ocean data asset ids found for "
275306
"s3://bucket2/prefix3. Removing external links from record.",
307+
"ERROR:root:Error processing s3://bucket3/prefix5: "
308+
"ValueError(\"Invalid external_links for: {'_id': '0004', "
309+
"'location': 's3://bucket3/prefix5', 'external_links': "
310+
"['def-456']}\")",
276311
"INFO:root:Updating 2 records",
277312
f"DEBUG:root:[{bulk_write_response}]",
278313
]

0 commit comments

Comments
 (0)