Skip to content

Commit 6922378

Browse files
ajar312helen-m-lin
andauthored
fix: remove naming convention check (#151)
* fix: remove naming convention check, update unit test * Revert "fix: remove naming convention check, update unit test" This reverts commit 65d9e2a. * fix: remove naming convention check, update the docstrings accordingly,update unit tests * fix: handle empty buckets * Revert "fix: handle empty buckets" This reverts commit 42456b8. * fix: edit a long line * docs: remove a bullet point on the naming convention check * fix: rename the docstring and move it up, edit the docstring to clarify what is being tested * Update tests/test_utils.py --------- Co-authored-by: Helen Lin <[email protected]>
1 parent 11d42f2 commit 6922378

File tree

6 files changed

+43
-79
lines changed

6 files changed

+43
-79
lines changed

docs/source/UserGuide.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ The workflow is generally as follows:
6060
4. For each prefix, process by checking if it is a new data asset
6161
and adding it to DocDB if necessary.
6262

63-
- If the prefix is not valid (does not adhere to data asset
64-
naming convention), log a warning.
6563
- If the metadata record exists in S3 but not in DocDB, copy it
6664
to DocDB.
6765
- If the metadata record does not exist in S3, create it and save

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ exclude = '''
5959
| _build
6060
| build
6161
| dist
62+
| .conda
6263
)/
6364
| .gitignore
6465
)
@@ -82,5 +83,5 @@ line_length = 79
8283
profile = "black"
8384

8485
[tool.interrogate]
85-
exclude = ["setup.py", "docs", "build"]
86+
exclude = ["setup.py", "docs", "build", ".conda"]
8687
fail-under = 100

src/aind_data_asset_indexer/aind_bucket_indexer.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
download_json_file_from_s3,
3838
get_dict_of_core_schema_file_info,
3939
get_dict_of_file_info,
40-
is_prefix_valid,
4140
is_record_location_valid,
4241
iterate_through_top_level,
4342
list_metadata_copies,
@@ -545,15 +544,13 @@ def _process_prefix(
545544
):
546545
"""
547546
Processes a prefix in S3
548-
1) If the prefix is not valid (does not adhere to data asset naming
549-
convention), log a warning and do not process it further.
550-
2) If metadata record exists in S3 and DocDB, do nothing.
551-
3) If record is in S3 but not DocDb, then copy it to DocDb if the
547+
1) If metadata record exists in S3 and DocDB, do nothing.
548+
2) If record is in S3 but not DocDb, then copy it to DocDb if the
552549
location in the metadata record matches the actual location and
553550
the record has an _id field. Otherwise, log a warning.
554-
4) If record does not exist in both DocDB and S3, build a new metadata
551+
3) If record does not exist in both DocDB and S3, build a new metadata
555552
file and save it to S3 (assume Lambda function will save to DocDB).
556-
5) In both cases above, we also copy the original core json files to a
553+
4) In both cases above, we also copy the original core json files to a
557554
subfolder and ensure the top level core jsons are in sync with the
558555
metadata.nd.json in S3.
559556
@@ -569,11 +566,6 @@ def _process_prefix(
569566
570567
"""
571568
bucket = self.job_settings.s3_bucket
572-
if not is_prefix_valid(s3_prefix):
573-
logging.warning(
574-
f"Prefix {s3_prefix} not valid in bucket {bucket}! Skipping."
575-
)
576-
return
577569
# Check if metadata record exists
578570
location = get_s3_location(bucket=bucket, prefix=s3_prefix)
579571
if location_to_id_map.get(location) is not None:

src/aind_data_asset_indexer/utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ def is_record_location_valid(
121121
True if there is a location field and the url in the field has a form
122122
like 's3://{expected_bucket}/prefix'
123123
Will return False if there is no s3 scheme, the bucket does not match
124-
the expected bucket, the prefix contains forward slashes, or the prefix
125-
is invalid or does not match the expected prefix.
124+
the expected bucket, the prefix contains forward slashes,
125+
or does not match the expected prefix.
126126
If the record name does not match the prefix, a warning is logged,
127127
but the method will still return True.
128128
@@ -143,7 +143,6 @@ def is_record_location_valid(
143143
if (
144144
stripped_prefix == ""
145145
or len(stripped_prefix.split("/")) > 1
146-
or not is_prefix_valid(stripped_prefix)
147146
or (
148147
expected_prefix is not None
149148
and stripped_prefix != expected_stripped_prefix

tests/test_aind_bucket_indexer.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1144,32 +1144,6 @@ def test_process_records(self, mock_dask_bag_map_parts: MagicMock):
11441144
self.basic_job._process_records(example_records)
11451145
mock_dask_bag_map_parts.assert_called()
11461146

1147-
@patch("aind_data_asset_indexer.aind_bucket_indexer.does_s3_object_exist")
1148-
@patch("aind_data_asset_indexer.aind_bucket_indexer.MetadataDbClient")
1149-
@patch("boto3.client")
1150-
def test_process_prefix_invalid_prefix(
1151-
self,
1152-
mock_s3_client: MagicMock,
1153-
mock_docdb_client: MagicMock,
1154-
mock_does_s3_object_exist: MagicMock,
1155-
):
1156-
"""Tests _process_prefix method when the prefix is invalid."""
1157-
1158-
location_to_id_map = dict()
1159-
with self.assertLogs(level="DEBUG") as captured:
1160-
self.basic_job._process_prefix(
1161-
s3_prefix="ecephys_642478",
1162-
docdb_client=mock_docdb_client,
1163-
s3_client=mock_s3_client,
1164-
location_to_id_map=location_to_id_map,
1165-
)
1166-
expected_log_messages = [
1167-
"WARNING:root:Prefix ecephys_642478 not valid in bucket "
1168-
"aind-ephys-data-dev-u5u0i5! Skipping."
1169-
]
1170-
self.assertEqual(expected_log_messages, captured.output)
1171-
mock_does_s3_object_exist.assert_not_called()
1172-
11731147
@patch(
11741148
"aind_data_asset_indexer.aind_bucket_indexer."
11751149
"upload_metadata_json_str_to_s3"

tests/test_utils.py

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,41 @@ def test_is_record_location_valid_true1(self):
215215
)
216216
)
217217

218+
def test_is_record_location_valid_true2(self):
219+
"""Tests is_record_location_valid returns true when prefix does not
220+
match the naming convention"""
221+
222+
example_record = {
223+
"_id": "abc-123",
224+
"name": "prefix1",
225+
"location": "s3://some_bucket/prefix1",
226+
}
227+
self.assertTrue(
228+
is_record_location_valid(
229+
example_record, expected_bucket="some_bucket"
230+
)
231+
)
232+
233+
def test_is_record_location_valid_true3(self):
234+
"""Tests is_record_location_valid returns true when prefix does
235+
not match name"""
236+
237+
example_record = {
238+
"_id": "abc-123",
239+
"name": "prefix2_2024-01-01_01-01-01",
240+
"location": "s3://some_bucket/prefix1_2024-01-01_01-01-01",
241+
}
242+
with self.assertLogs(level="DEBUG") as captured:
243+
result = is_record_location_valid(
244+
example_record, expected_bucket="some_bucket"
245+
)
246+
expected_log_messages = [
247+
"WARNING:root:Record name prefix2_2024-01-01_01-01-01 does not "
248+
"match prefix prefix1_2024-01-01_01-01-01."
249+
]
250+
self.assertEqual(expected_log_messages, captured.output)
251+
self.assertTrue(result)
252+
218253
def test_is_record_location_valid_false0(self):
219254
"""Tests is_record_location_valid returns false when no location field
220255
is present"""
@@ -275,41 +310,6 @@ def test_is_record_location_valid_false3(self):
275310
)
276311

277312
def test_is_record_location_valid_false4(self):
278-
"""Tests is_record_location_valid returns false when prefix is
279-
invalid"""
280-
281-
example_record = {
282-
"_id": "abc-123",
283-
"name": "prefix1",
284-
"location": "s3://some_bucket/prefix1",
285-
}
286-
self.assertFalse(
287-
is_record_location_valid(
288-
example_record, expected_bucket="some_bucket"
289-
)
290-
)
291-
292-
def test_is_record_location_valid_true2(self):
293-
"""Tests is_record_location_valid returns true when prefix does
294-
not match name"""
295-
296-
example_record = {
297-
"_id": "abc-123",
298-
"name": "prefix2_2024-01-01_01-01-01",
299-
"location": "s3://some_bucket/prefix1_2024-01-01_01-01-01",
300-
}
301-
with self.assertLogs(level="DEBUG") as captured:
302-
result = is_record_location_valid(
303-
example_record, expected_bucket="some_bucket"
304-
)
305-
expected_log_messages = [
306-
"WARNING:root:Record name prefix2_2024-01-01_01-01-01 does not "
307-
"match prefix prefix1_2024-01-01_01-01-01."
308-
]
309-
self.assertEqual(expected_log_messages, captured.output)
310-
self.assertTrue(result)
311-
312-
def test_is_record_location_valid_false6(self):
313313
"""Tests is_record_location_valid returns false when prefixes don't
314314
match"""
315315

0 commit comments

Comments
 (0)