Skip to content

Commit 96817ca

Browse files
Refactor 52 log messages (#100)
* refactor: logs and scripts dir --------- Co-authored-by: Helen Lin <[email protected]>
1 parent 073faa6 commit 96817ca

12 files changed

+682
-805
lines changed

Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ FROM python:3.10-slim
33
WORKDIR /app
44

55
ADD src ./src
6+
ADD scripts ./scripts
67
ADD pyproject.toml .
78
ADD setup.py .
89

910
RUN apt-get update
1011
RUN pip install . --no-cache-dir
1112
RUN pip install awscli
1213

13-
RUN chmod +x ./src/aind_data_asset_indexer/run.sh
14-
CMD ["./src/aind_data_asset_indexer/run.sh"]
14+
RUN chmod +x ./scripts/run.sh
15+
CMD ["./scripts/run.sh"]

src/aind_data_asset_indexer/aind_bucket_indexer.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,14 @@ def _resolve_schema_information(
272272
object_key = create_object_key(
273273
prefix=prefix, filename=core_schema_file_name
274274
)
275-
common_kwargs["core_schema_info_in_root"] = (
276-
get_dict_of_file_info(
277-
s3_client=s3_client,
278-
bucket=self.job_settings.s3_bucket,
279-
keys=[object_key],
280-
).get(object_key)
275+
common_kwargs[
276+
"core_schema_info_in_root"
277+
] = get_dict_of_file_info(
278+
s3_client=s3_client,
279+
bucket=self.job_settings.s3_bucket,
280+
keys=[object_key],
281+
).get(
282+
object_key
281283
)
282284
self._copy_file_from_root_to_subdir(**common_kwargs)
283285
# If field is null, a file exists in the root folder, and
@@ -391,7 +393,7 @@ def _process_docdb_record(
391393
response = collection.delete_one(
392394
filter={"_id": docdb_record["_id"]}
393395
)
394-
logging.info(response.raw_result)
396+
logging.debug(response.raw_result)
395397
else: # There is a metadata.nd.json file in S3.
396398
# Schema info in root level directory
397399
s3_core_schema_info = get_dict_of_core_schema_file_info(
@@ -422,9 +424,9 @@ def _process_docdb_record(
422424
)
423425
db = docdb_client[self.job_settings.doc_db_db_name]
424426
collection = db[self.job_settings.doc_db_collection_name]
425-
fields_to_update["last_modified"] = (
426-
datetime.utcnow().isoformat()
427-
)
427+
fields_to_update[
428+
"last_modified"
429+
] = datetime.utcnow().isoformat()
428430
response = collection.update_one(
429431
{"_id": docdb_record["_id"]},
430432
{"$set": fields_to_update},
@@ -580,20 +582,22 @@ def _process_prefix(
580582
]
581583
if "_id" in json_contents:
582584
# TODO: check is_dict_corrupt(json_contents)
585+
logging.info(
586+
f"Adding record to docdb for: {location}"
587+
)
583588
response = collection.update_one(
584589
{"_id": json_contents["_id"]},
585590
{"$set": json_contents},
586591
upsert=True,
587592
)
588-
logging.info(response.raw_result)
593+
logging.debug(response.raw_result)
589594
cond_copy_then_sync_core_json_files(
590595
metadata_json=json.dumps(
591596
json_contents, default=str
592597
),
593598
bucket=bucket,
594599
prefix=s3_prefix,
595600
s3_client=s3_client,
596-
log_flag=True,
597601
copy_original_md_subdir=(
598602
self.job_settings.copy_original_md_subdir
599603
),
@@ -635,7 +639,6 @@ def _process_prefix(
635639
bucket=bucket,
636640
prefix=s3_prefix,
637641
s3_client=s3_client,
638-
log_flag=True,
639642
copy_original_md_subdir=(
640643
self.job_settings.copy_original_md_subdir
641644
),
@@ -648,7 +651,7 @@ def _process_prefix(
648651
prefix=s3_prefix,
649652
s3_client=s3_client,
650653
)
651-
logging.info(s3_response)
654+
logging.debug(s3_response)
652655
# Assume Lambda function will move it to DocDb. If it doesn't,
653656
# then next index job will pick it up.
654657
else:

src/aind_data_asset_indexer/codeocean_bucket_indexer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def _process_codeocean_record(
278278
{"$set": json_contents},
279279
upsert=True,
280280
)
281-
logging.info(x.raw_result)
281+
logging.debug(x.raw_result)
282282
else:
283283
logging.warning(
284284
f"Unable to build metadata record for: {location}!"
@@ -363,10 +363,11 @@ def _dask_task_to_delete_record_list(self, record_list: List[str]):
363363
db = docdb_client[self.job_settings.doc_db_db_name]
364364
collection = db[self.job_settings.doc_db_collection_name]
365365
try:
366+
logging.info(f"Removing {len(record_list)} records")
366367
response = collection.delete_many(
367368
filter={"_id": {"$in": record_list}}
368369
)
369-
logging.info(response.raw_result)
370+
logging.debug(response.raw_result)
370371
except Exception as e:
371372
logging.error(f"Error deleting records: {repr(e)}")
372373
docdb_client.close()

src/aind_data_asset_indexer/populate_s3_with_metadata_files.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ def _process_prefix(self, prefix: str, s3_client: S3Client):
7979
bucket=bucket,
8080
prefix=prefix,
8181
s3_client=s3_client,
82-
log_flag=True,
8382
copy_original_md_subdir=(
8483
self.job_settings.copy_original_md_subdir
8584
),
@@ -92,7 +91,7 @@ def _process_prefix(self, prefix: str, s3_client: S3Client):
9291
prefix=prefix,
9392
s3_client=s3_client,
9493
)
95-
logging.info(response)
94+
logging.debug(response)
9695
else:
9796
logging.warning(
9897
f"Unable to build metadata record for: {location}!"

src/aind_data_asset_indexer/utils.py

Lines changed: 23 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -31,38 +31,6 @@
3131
]
3232

3333

34-
def _log_message(
35-
message: str, log_level: int = logging.INFO, log_flag: bool = True
36-
) -> None:
37-
"""
38-
Log a message using the given log level. If log_flag is False,
39-
then it will not log anything.
40-
41-
Parameters
42-
----------
43-
message : str
44-
log_level : int
45-
Default is logging.INFO
46-
log_flag : bool
47-
Default is True
48-
49-
Returns
50-
-------
51-
None
52-
"""
53-
if not log_flag:
54-
return
55-
if log_level not in [
56-
logging.DEBUG,
57-
logging.INFO,
58-
logging.WARNING,
59-
logging.ERROR,
60-
logging.CRITICAL,
61-
]:
62-
raise ValueError("Invalid log level")
63-
logging.log(log_level, message)
64-
65-
6634
def create_object_key(prefix: str, filename: str) -> str:
6735
"""
6836
For a given s3 prefix and filename, create the expected
@@ -656,7 +624,6 @@ def cond_copy_then_sync_core_json_files(
656624
prefix: str,
657625
s3_client: S3Client,
658626
copy_original_md_subdir: str = "original_metadata",
659-
log_flag: bool = False,
660627
) -> None:
661628
"""
662629
For a given bucket and prefix
@@ -675,8 +642,6 @@ def cond_copy_then_sync_core_json_files(
675642
The prefix for the S3 object keys.
676643
s3_client : S3Client
677644
The S3 client object.
678-
log_flag: bool
679-
Flag indicating whether to log operations. Default is False.
680645
copy_original_md_subdir : str
681646
Subdirectory to copy original core schema json files to.
682647
Default is 'original_metadata'.
@@ -692,27 +657,22 @@ def cond_copy_then_sync_core_json_files(
692657
prefix=prefix,
693658
copy_subdir=copy_original_md_subdir,
694659
):
695-
_log_message(
696-
message=(
697-
"Copy of original metadata already exists at "
698-
f"s3://{bucket}/{prefix}/{copy_original_md_subdir}"
699-
),
700-
log_flag=log_flag,
660+
logging.warning(
661+
"Copy of original metadata already exists at "
662+
f"s3://{bucket}/{prefix}/{copy_original_md_subdir}"
701663
)
702664
else:
703665
copy_core_json_files(
704666
bucket=bucket,
705667
prefix=prefix,
706668
s3_client=s3_client,
707669
copy_original_md_subdir=copy_original_md_subdir,
708-
log_flag=log_flag,
709670
)
710671
sync_core_json_files(
711672
metadata_json=metadata_json,
712673
bucket=bucket,
713674
prefix=prefix,
714675
s3_client=s3_client,
715-
log_flag=log_flag,
716676
)
717677

718678

@@ -721,7 +681,6 @@ def copy_core_json_files(
721681
prefix: str,
722682
s3_client: S3Client,
723683
copy_original_md_subdir: str,
724-
log_flag: bool = False,
725684
) -> None:
726685
"""
727686
For a given bucket and prefix, copy the core schema files to a
@@ -735,8 +694,6 @@ def copy_core_json_files(
735694
The prefix for the S3 object keys.
736695
s3_client : S3Client
737696
The S3 client object.
738-
log_flag: bool
739-
Flag indicating whether to log operations. Default is False.
740697
copy_original_md_subdir : str
741698
Subdirectory to copy original core schema json files to.
742699
For example, 'original_metadata'.
@@ -766,23 +723,17 @@ def copy_core_json_files(
766723
filename=file_name.replace(".json", f".{date_stamp}.json"),
767724
)
768725
# Copy original core json files to /original_metadata
769-
_log_message(
770-
message=f"Copying {source} to {target} in s3://{bucket}",
771-
log_flag=log_flag,
772-
)
726+
logging.info(f"Copying {source} to {target} in s3://{bucket}")
773727
response = s3_client.copy_object(
774728
Bucket=bucket,
775729
CopySource={"Bucket": bucket, "Key": source},
776730
Key=target,
777731
)
778-
_log_message(message=response, log_flag=log_flag)
732+
logging.debug(response)
779733
else:
780-
_log_message(
781-
message=(
782-
f"Source file {source_location} does not exist. "
783-
f"Skipping copy."
784-
),
785-
log_flag=log_flag,
734+
logging.info(
735+
f"Source file {source_location} does not exist. "
736+
f"Skipping copy."
786737
)
787738

788739

@@ -791,7 +742,6 @@ def sync_core_json_files(
791742
bucket: str,
792743
prefix: str,
793744
s3_client: S3Client,
794-
log_flag: bool = False,
795745
) -> None:
796746
"""
797747
Sync the core schema files with the core fields from metadata.nd.json.
@@ -810,8 +760,6 @@ def sync_core_json_files(
810760
The prefix for the S3 object keys.
811761
s3_client : S3Client
812762
The S3 client object.
813-
log_flag: bool
814-
Flag indicating whether to log operations. Default is False.
815763
816764
Returns
817765
-------
@@ -838,66 +786,51 @@ def sync_core_json_files(
838786
# Core schema jsons are created if they don't already exist.
839787
# Otherwise, they are only updated if their contents are outdated.
840788
if core_files_infos[object_key] is None:
841-
_log_message(
842-
message=(f"Uploading new {field_name} to {location}"),
843-
log_flag=log_flag,
844-
)
789+
logging.info(f"Uploading new {field_name} to {location}")
845790
response = upload_json_str_to_s3(
846791
bucket=bucket,
847792
object_key=object_key,
848793
json_str=field_contents_str,
849794
s3_client=s3_client,
850795
)
851-
_log_message(message=response, log_flag=log_flag)
796+
logging.debug(response)
852797
else:
853798
s3_object_hash = core_files_infos[object_key]["e_tag"].strip(
854799
'"'
855800
)
856801
core_field_md5_hash = compute_md5_hash(field_contents_str)
857802
if core_field_md5_hash != s3_object_hash:
858-
_log_message(
859-
message=(
860-
f"Uploading updated {field_name} to {location}"
861-
),
862-
log_flag=log_flag,
803+
logging.info(
804+
f"Uploading updated {field_name} to {location}"
863805
)
864806
response = upload_json_str_to_s3(
865807
bucket=bucket,
866808
object_key=object_key,
867809
json_str=field_contents_str,
868810
s3_client=s3_client,
869811
)
870-
_log_message(message=response, log_flag=log_flag)
812+
logging.debug(response)
871813
else:
872-
_log_message(
873-
message=(
874-
f"{field_name} is up-to-date in {location}. "
875-
f"Skipping."
876-
),
877-
log_flag=log_flag,
814+
logging.info(
815+
f"{field_name} is up-to-date in {location}. "
816+
f"Skipping."
878817
)
879818
else:
880819
# If a core field is None but the core json exists,
881820
# delete the core json.
882821
if core_files_infos[object_key] is not None:
883-
_log_message(
884-
message=(
885-
f"{field_name} not found in metadata.nd.json for "
886-
f"{prefix} but {location} exists! Deleting."
887-
),
888-
log_flag=log_flag,
822+
logging.info(
823+
f"{field_name} not found in metadata.nd.json for "
824+
f"{prefix} but {location} exists! Deleting."
889825
)
890826
response = s3_client.delete_object(
891827
Bucket=bucket, Key=object_key
892828
)
893-
_log_message(message=response, log_flag=log_flag)
829+
logging.debug(response)
894830
else:
895-
_log_message(
896-
message=(
897-
f"{field_name} not found in metadata.nd.json for "
898-
f"{prefix} nor in {location}! Skipping."
899-
),
900-
log_flag=log_flag,
831+
logging.info(
832+
f"{field_name} not found in metadata.nd.json for "
833+
f"{prefix} nor in {location}! Skipping."
901834
)
902835

903836

0 commit comments

Comments
 (0)