Skip to content

Commit c28c2d1

Browse files
authored
feat: option to disable docdb/s3/co sync (#135)
1 parent e593a52 commit c28c2d1

File tree

6 files changed

+118
-4
lines changed

6 files changed

+118
-4
lines changed

src/aind_data_asset_indexer/aind_bucket_indexer.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -743,8 +743,8 @@ def _process_prefixes(self, prefixes: List[str]):
743743
)
744744
mapped_partitions.compute()
745745

746-
def run_job(self):
747-
"""Main method to run."""
746+
def _run_docdb_sync(self):
747+
"""Sync changes in DocDB to S3"""
748748
with self._create_docdb_client() as iterator_docdb_client:
749749
filter = {
750750
"location": {
@@ -768,6 +768,9 @@ def run_job(self):
768768
if len(page) > 0:
769769
self._process_records(records=page)
770770
logging.info("Finished scanning through DocDb.")
771+
772+
def _run_s3_sync(self):
773+
"""Sync changes in S3 to DocDB"""
771774
logging.info("Starting to scan through S3.")
772775
iterator_s3_client = boto3.client("s3")
773776
prefix_iterator = iterate_through_top_level(
@@ -779,6 +782,13 @@ def run_job(self):
779782
iterator_s3_client.close()
780783
logging.info("Finished scanning through S3.")
781784

785+
def run_job(self):
786+
"""Main method to run."""
787+
if self.job_settings.run_docdb_sync is True:
788+
self._run_docdb_sync()
789+
if self.job_settings.run_s3_sync is True:
790+
self._run_s3_sync()
791+
782792

783793
if __name__ == "__main__":
784794
sys_args = sys.argv[1:]

src/aind_data_asset_indexer/codeocean_bucket_indexer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,8 @@ def _delete_records_from_docdb(self, record_list: List[str]):
401401

402402
def run_job(self):
403403
"""Main method to run."""
404+
if self.job_settings.run_co_sync is not True:
405+
return
404406
logging.info("Starting to scan through CodeOcean.")
405407
retry = Retry(
406408
total=5,

src/aind_data_asset_indexer/models.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ class AindIndexBucketJobSettings(IndexJobSettings):
5353
doc_db_host: str
5454
doc_db_db_name: str
5555
doc_db_collection_name: str
56+
run_docdb_sync: bool = Field(
57+
default=True,
58+
description="If true, then process DocDB records to sync to S3.",
59+
)
60+
run_s3_sync: bool = Field(
61+
default=True,
62+
description="If true, then process S3 prefixes to sync to DocDB.",
63+
)
5664

5765

5866
class PopulateAindBucketsJobSettings(IndexJobSettings):
@@ -73,13 +81,19 @@ class AindIndexBucketsJobSettings(AindIndexBucketJobSettings):
7381

7482

7583
class CodeOceanIndexBucketJobSettings(IndexJobSettings):
76-
"""Aind Index Bucket Job Settings"""
84+
"""Code Ocean Index Bucket Job Settings"""
7785

7886
doc_db_host: str
7987
doc_db_db_name: str
8088
doc_db_collection_name: str
8189
codeocean_domain: str
8290
codeocean_token: SecretStr
91+
run_co_sync: bool = Field(
92+
default=True,
93+
description=(
94+
"If true, then process Code Ocean results and external links."
95+
),
96+
)
8397

8498
@classmethod
8599
def from_param_store(cls, param_store_name: str):

tests/test_aind_bucket_indexer.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,6 +1856,52 @@ def test_run_job_lookback_days(
18561856
]
18571857
)
18581858

1859+
@patch(
1860+
"aind_data_asset_indexer.aind_bucket_indexer.AindIndexBucketJob."
1861+
"_run_s3_sync"
1862+
)
1863+
@patch(
1864+
"aind_data_asset_indexer.aind_bucket_indexer.AindIndexBucketJob."
1865+
"_run_docdb_sync"
1866+
)
1867+
def test_run_job_skip_docdb_sync(
1868+
self,
1869+
mock_run_docdb_sync: MagicMock,
1870+
mock_run_s3_sync: MagicMock,
1871+
):
1872+
"""Tests main run_job method when run_docdb_sync is False."""
1873+
1874+
job_configs_json = self.basic_job_configs.model_dump(mode="json")
1875+
job_configs_json["run_docdb_sync"] = False
1876+
job_configs = AindIndexBucketJobSettings(**job_configs_json)
1877+
job = AindIndexBucketJob(job_settings=job_configs)
1878+
job.run_job()
1879+
mock_run_docdb_sync.assert_not_called()
1880+
mock_run_s3_sync.assert_called_once()
1881+
1882+
@patch(
1883+
"aind_data_asset_indexer.aind_bucket_indexer.AindIndexBucketJob."
1884+
"_run_s3_sync"
1885+
)
1886+
@patch(
1887+
"aind_data_asset_indexer.aind_bucket_indexer.AindIndexBucketJob."
1888+
"_run_docdb_sync"
1889+
)
1890+
def test_run_job_skip_s3_sync(
1891+
self,
1892+
mock_run_docdb_sync: MagicMock,
1893+
mock_run_s3_sync: MagicMock,
1894+
):
1895+
"""Tests main run_job method when run_s3_sync is False."""
1896+
1897+
job_configs_json = self.basic_job_configs.model_dump(mode="json")
1898+
job_configs_json["run_s3_sync"] = False
1899+
job_configs = AindIndexBucketJobSettings(**job_configs_json)
1900+
job = AindIndexBucketJob(job_settings=job_configs)
1901+
job.run_job()
1902+
mock_run_docdb_sync.assert_called_once()
1903+
mock_run_s3_sync.assert_not_called()
1904+
18591905

18601906
if __name__ == "__main__":
18611907
unittest.main()

tests/test_codeocean_bucket_indexer.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,41 @@ def test_run_job(
655655
)
656656
mock_docdb_client.return_value.__exit__.assert_called_once()
657657

658+
@patch(
659+
"aind_data_asset_indexer.codeocean_bucket_indexer."
660+
"CodeOceanIndexBucketJob._update_external_links_in_docdb"
661+
)
662+
@patch(
663+
"aind_data_asset_indexer.codeocean_bucket_indexer."
664+
"CodeOceanIndexBucketJob._delete_records_from_docdb"
665+
)
666+
@patch(
667+
"aind_data_asset_indexer.codeocean_bucket_indexer."
668+
"CodeOceanIndexBucketJob._process_codeocean_records"
669+
)
670+
@patch(
671+
"aind_data_asset_indexer.codeocean_bucket_indexer."
672+
"get_all_processed_codeocean_asset_records"
673+
)
674+
def test_run_job_skip(
675+
self,
676+
mock_get_all_co_records: MagicMock,
677+
mock_process_codeocean_records: MagicMock,
678+
mock_delete_records_from_docdb: MagicMock,
679+
mock_update_external_links_in_docdb: MagicMock,
680+
):
681+
"""Tests run_job method. Given the example responses, should ignore
682+
one record, add one record, and delete one record."""
683+
job_configs_json = self.basic_job_configs.model_dump(mode="json")
684+
job_configs_json["run_co_sync"] = False
685+
job_configs = CodeOceanIndexBucketJobSettings(**job_configs_json)
686+
job = CodeOceanIndexBucketJob(job_settings=job_configs)
687+
job.run_job()
688+
mock_get_all_co_records.assert_not_called()
689+
mock_update_external_links_in_docdb.assert_not_called()
690+
mock_process_codeocean_records.assert_not_called()
691+
mock_delete_records_from_docdb.assert_not_called()
692+
658693

659694
if __name__ == "__main__":
660695
unittest.main()

tests/test_models.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ def test_defaults(self):
8686
self.assertEqual(
8787
"some_docdb_collection_name", job_settings.doc_db_collection_name
8888
)
89+
self.assertTrue(job_settings.run_docdb_sync)
90+
self.assertTrue(job_settings.run_s3_sync)
8991

9092
@patch("boto3.client")
9193
def test_from_from_param_store(self, mock_boto3_client):
@@ -98,7 +100,8 @@ def test_from_from_param_store(self, mock_boto3_client):
98100
'{"doc_db_host": "some_docdb_host",'
99101
'"doc_db_db_name":"some_docdb_dbname",'
100102
'"doc_db_collection_name":"some_docdb_collection_name",'
101-
'"s3_bucket":"some_bucket"}'
103+
'"s3_bucket":"some_bucket",'
104+
'"run_s3_sync":false}'
102105
),
103106
"Version": 1,
104107
"LastModifiedDate": datetime(
@@ -129,6 +132,7 @@ def test_from_from_param_store(self, mock_boto3_client):
129132
doc_db_host="some_docdb_host",
130133
doc_db_db_name="some_docdb_dbname",
131134
doc_db_collection_name="some_docdb_collection_name",
135+
run_s3_sync=False,
132136
)
133137
self.assertEqual(expected_job_settings, job_settings)
134138

@@ -167,6 +171,8 @@ def test_class_constructor(self):
167171
self.assertEqual(
168172
"some_docdb_collection_name", job_settings.doc_db_collection_name
169173
)
174+
self.assertTrue(job_settings.run_docdb_sync)
175+
self.assertTrue(job_settings.run_s3_sync)
170176

171177

172178
class TestCodeOceanIndexBucketJobSettings(unittest.TestCase):
@@ -194,6 +200,7 @@ def test_defaults(self):
194200
self.assertEqual(
195201
"some_co_token", job_settings.codeocean_token.get_secret_value()
196202
)
203+
self.assertTrue(job_settings.run_co_sync)
197204

198205
@patch("boto3.client")
199206
def test_from_from_param_store(self, mock_boto3_client):

0 commit comments

Comments
 (0)