Skip to content

Commit 71aa8c5

Browse files
authored
Merge pull request #167 from AllenNeuralDynamics/release-v0.20.1
v0.20.1
2 parents 2b3c6b1 + 6ea14bb commit 71aa8c5

File tree

7 files changed

+8
-88
lines changed

7 files changed

+8
-88
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Package"""
22

3-
__version__ = "0.20.0"
3+
__version__ = "0.20.1"

src/aind_data_asset_indexer/aind_bucket_indexer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import requests
1515
from aind_data_access_api.document_db import MetadataDbClient
1616
from aind_data_access_api.utils import (
17+
build_docdb_location_to_id_map,
1718
get_s3_bucket_and_prefix,
1819
get_s3_location,
1920
paginate_docdb,
@@ -26,7 +27,6 @@
2627

2728
from aind_data_asset_indexer.models import AindIndexBucketJobSettings
2829
from aind_data_asset_indexer.utils import (
29-
build_docdb_location_to_id_map,
3030
compute_md5_hash,
3131
core_schema_file_names,
3232
create_metadata_object_key,
@@ -83,8 +83,7 @@ def _create_docdb_client(self) -> MetadataDbClient:
8383
session.mount("https://", adapter)
8484
return MetadataDbClient(
8585
host=self.job_settings.doc_db_host,
86-
database=self.job_settings.doc_db_db_name,
87-
collection=self.job_settings.doc_db_collection_name,
86+
version="v1",
8887
session=session,
8988
)
9089

@@ -431,7 +430,6 @@ def _process_docdb_record(
431430
# Pull record from docdb to get new last_modified as well
432431
docdb_response = docdb_client.retrieve_docdb_records(
433432
filter_query={"_id": docdb_record["_id"]},
434-
paginate=False,
435433
)
436434
docdb_record = docdb_response[0]
437435
# Sync docdb record to metadata.nd.json in root folder

src/aind_data_asset_indexer/codeocean_bucket_indexer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ def _create_docdb_client(self) -> MetadataDbClient:
6464
session.mount("https://", adapter)
6565
return MetadataDbClient(
6666
host=self.job_settings.doc_db_host,
67-
database=self.job_settings.doc_db_db_name,
68-
collection=self.job_settings.doc_db_collection_name,
67+
version="v1",
6968
session=session,
7069
)
7170

src/aind_data_asset_indexer/models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ class AindIndexBucketJobSettings(IndexJobSettings):
5151
"""Aind Index Bucket Job Settings"""
5252

5353
doc_db_host: str
54-
doc_db_db_name: str
55-
doc_db_collection_name: str
54+
doc_db_db_name: Optional[str]
55+
doc_db_collection_name: Optional[str]
5656
run_docdb_sync: bool = Field(
5757
default=True,
5858
description="If true, then process DocDB records to sync to S3.",
@@ -84,8 +84,8 @@ class CodeOceanIndexBucketJobSettings(IndexJobSettings):
8484
"""Code Ocean Index Bucket Job Settings"""
8585

8686
doc_db_host: str
87-
doc_db_db_name: str
88-
doc_db_collection_name: str
87+
doc_db_db_name: Optional[str]
88+
doc_db_collection_name: Optional[str]
8989
codeocean_domain: str
9090
codeocean_token: SecretStr
9191
run_co_sync: bool = Field(

src/aind_data_asset_indexer/utils.py

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from typing import Dict, Iterator, List, Optional
1010
from urllib.parse import urlparse
1111

12-
from aind_data_access_api.document_db import MetadataDbClient
1312
from aind_data_access_api.utils import get_s3_location
1413
from aind_data_schema.core.data_description import DataLevel, DataRegex
1514
from aind_data_schema.core.metadata import CORE_FILES as CORE_SCHEMAS
@@ -783,40 +782,6 @@ def sync_core_json_files(
783782
)
784783

785784

786-
# TODO: replace with method from aind_data_access_api.utils once available
787-
def build_docdb_location_to_id_map(
788-
docdb_api_client: MetadataDbClient,
789-
bucket: str,
790-
prefixes: List[str],
791-
) -> Dict[str, str]:
792-
"""
793-
For a given s3 bucket and list of prefixes, return a dictionary that looks
794-
like {'s3://bucket/prefix': 'abc-1234'} where the value is the id of the
795-
record in DocDb. If the record does not exist, then there will be no key
796-
in the dictionary.
797-
798-
Parameters
799-
----------
800-
docdb_api_client : MetadataDbClient
801-
bucket : str
802-
prefixes : List[str]
803-
804-
Returns
805-
-------
806-
Dict[str, str]
807-
808-
"""
809-
locations = [get_s3_location(bucket=bucket, prefix=p) for p in prefixes]
810-
# NOTE: use aggregation since filter too large for retrieve_docdb_records
811-
agg_pipeline = [
812-
{"$match": {"location": {"$in": locations}}},
813-
{"$project": {"location": 1, "_id": 1}},
814-
]
815-
results = docdb_api_client.aggregate_docdb_records(pipeline=agg_pipeline)
816-
location_to_id_map = {r["location"]: r["_id"] for r in results}
817-
return location_to_id_map
818-
819-
820785
def get_all_processed_codeocean_asset_records(
821786
co_client: CodeOcean, co_data_asset_bucket: str
822787
) -> Dict[str, dict]:

tests/test_aind_bucket_indexer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,6 @@ def test_process_docdb_record_valid_metadata_nd_json_file(
921921
)
922922
mock_docdb_client.retrieve_docdb_records.assert_called_once_with(
923923
filter_query={"_id": self.example_md_record.get("_id")},
924-
paginate=False,
925924
)
926925
mock_write_root_file_with_record_info.assert_called_once_with(
927926
s3_client=mock_s3_client,

tests/test_utils.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from codeocean.data_asset import DataAsset
1414

1515
from aind_data_asset_indexer.utils import (
16-
build_docdb_location_to_id_map,
1716
build_metadata_record_from_prefix,
1817
compute_md5_hash,
1918
cond_copy_then_sync_core_json_files,
@@ -1226,46 +1225,6 @@ def test_cond_copy_then_sync_core_json_files_mismatch(
12261225
Bucket=bucket, Key=f"{pfx}/rig.json"
12271226
)
12281227

1229-
@patch("aind_data_access_api.utils.MetadataDbClient")
1230-
def test_build_docdb_location_to_id_map(
1231-
self, mock_docdb_client: MagicMock
1232-
):
1233-
"""Tests build_docdb_location_to_id_map"""
1234-
bucket = "aind-ephys-data-dev-u5u0i5"
1235-
mock_docdb_client.aggregate_docdb_records.return_value = [
1236-
{
1237-
"_id": "70bcf356-985f-4a2a-8105-de900e35e788",
1238-
"location": (
1239-
f"s3://{bucket}/ecephys_655019_2000-04-04_04-00-00"
1240-
),
1241-
},
1242-
{
1243-
"_id": "5ca4a951-d374-4f4b-8279-d570a35b2286",
1244-
"location": (
1245-
f"s3://{bucket}/ecephys_567890_2000-01-01_04-00-00"
1246-
),
1247-
},
1248-
]
1249-
1250-
actual_map = build_docdb_location_to_id_map(
1251-
docdb_api_client=mock_docdb_client,
1252-
bucket=bucket,
1253-
prefixes=[
1254-
"ecephys_655019_2000-04-04_04-00-00",
1255-
"ecephys_567890_2000-01-01_04-00-00/",
1256-
"missing_655019_2000-01-01_01-01-02",
1257-
],
1258-
)
1259-
expected_map = {
1260-
f"s3://{bucket}/ecephys_655019_2000-04-04_04-00-00": (
1261-
"70bcf356-985f-4a2a-8105-de900e35e788"
1262-
),
1263-
f"s3://{bucket}/ecephys_567890_2000-01-01_04-00-00": (
1264-
"5ca4a951-d374-4f4b-8279-d570a35b2286"
1265-
),
1266-
}
1267-
self.assertEqual(expected_map, actual_map)
1268-
12691228
@patch("codeocean.data_asset.DataAssets.search_data_assets_iterator")
12701229
def test_get_all_processed_codeocean_asset_records(
12711230
self, mock_search_all_data_assets: MagicMock

0 commit comments

Comments
 (0)