Skip to content

Commit da4bc0e

Browse files
committed
Add error handling
1 parent 2d2a738 commit da4bc0e

File tree

1 file changed

+140
-122
lines changed
  • src/idc_annotation_conversion/pan_cancer_nuclei_seg

1 file changed

+140
-122
lines changed

src/idc_annotation_conversion/pan_cancer_nuclei_seg/__main__.py

Lines changed: 140 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from google.cloud import storage
1616
import highdicom as hd
1717
from oauthlib.oauth2 import BackendApplicationClient
18+
import pandas as pd
1819
from requests_oauthlib import OAuth2Session
1920

2021
from idc_annotation_conversion import cloud_io
@@ -339,6 +340,8 @@ def run(
339340
"Unsuccessful connecting to requested DICOM archive."
340341
) from e
341342

343+
errors = []
344+
342345
# Loop over requested collections
343346
for collection in collections:
344347
prefix = f'cnn-nuclear-segmentations-2019/data-files/{collection}/'
@@ -376,141 +379,156 @@ def run(
376379

377380
logging.info(f"Processing container: {container_id}")
378381

379-
selection_query = f"""
380-
SELECT
381-
gcs_url,
382-
Cast(NumberOfFrames AS int) AS NumberOfFrames
383-
FROM
384-
bigquery-public-data.idc_current.dicom_all
385-
WHERE
386-
ContainerIdentifier='{container_id}'
387-
ORDER BY
388-
NumberOfFrames DESC
389-
"""
390-
selection_result = bq_client.query(selection_query)
391-
selection_df = selection_result.result().to_dataframe()
392-
393-
if len(selection_df) == 0:
394-
# No image found, skip this for now
395-
logging.error(
396-
f"Could not locate image for container {container_id}."
397-
)
398-
continue
399-
400-
source_images = []
401-
for i, url in enumerate(selection_df.gcs_url):
402-
blob_name = "/".join(url.split("/")[3:])
403-
wsi_dcm = cloud_io.read_dataset_from_blob(
404-
bucket=public_bucket,
405-
blob_name=blob_name,
382+
try:
383+
384+
selection_query = f"""
385+
SELECT
386+
gcs_url,
387+
Cast(NumberOfFrames AS int) AS NumberOfFrames
388+
FROM
389+
bigquery-public-data.idc_current.dicom_all
390+
WHERE
391+
ContainerIdentifier='{container_id}'
392+
ORDER BY
393+
NumberOfFrames DESC
394+
"""
395+
selection_result = bq_client.query(selection_query)
396+
selection_df = selection_result.result().to_dataframe()
397+
398+
if len(selection_df) == 0:
399+
# No image found, skip this for now
400+
logging.error(
401+
f"Could not locate image for container {container_id}."
402+
)
403+
continue
404+
405+
source_images = []
406+
for i, url in enumerate(selection_df.gcs_url):
407+
blob_name = "/".join(url.split("/")[3:])
408+
wsi_dcm = cloud_io.read_dataset_from_blob(
409+
bucket=public_bucket,
410+
blob_name=blob_name,
411+
)
412+
source_images.append(wsi_dcm)
413+
414+
# Store to disk
415+
if output_dir is not None and store_wsi_dicom:
416+
wsi_path = (
417+
collection_dir / f"{container_id}_im_{i}.dcm"
418+
)
419+
wsi_dcm.save_as(wsi_path)
420+
421+
# Store to DICOM archive
422+
if dicom_archive is not None:
423+
web_client = get_dicom_web_client(
424+
url=dicom_archive,
425+
token_url=archive_token_url,
426+
client_id=archive_client_id,
427+
client_secret=archive_client_secret,
428+
)
429+
web_client.store_instances([wsi_dcm])
430+
431+
ann_dcm, seg_dcms = convert_annotations(
432+
annotation_csvs=iter_csvs(ann_blob),
433+
source_images=source_images,
434+
include_segmentation=with_segmentation,
435+
segmentation_type=segmentation_type,
436+
annotation_coordinate_type=annotation_coordinate_type,
437+
dimension_organization_type=dimension_organization_type,
438+
create_pyramid=create_pyramid,
439+
graphic_type=graphic_type,
440+
workers=workers,
406441
)
407-
source_images.append(wsi_dcm)
408442

409-
# Store to disk
410-
if output_dir is not None and store_wsi_dicom:
411-
wsi_path = (
412-
collection_dir / f"{container_id}_im_{i}.dcm"
443+
# Store objects to bucket
444+
if store_bucket:
445+
if output_bucket is None:
446+
data_str = (datetime.date.today())
447+
output_bucket = (
448+
"pan_cancer_nuclei_seg_annotation_"
449+
f"conversion_{data_str}"
450+
)
451+
output_bucket_obj = output_client.bucket(output_bucket)
452+
453+
if not output_bucket_obj.exists():
454+
output_bucket_obj.create(
455+
location=cloud_config.GCP_DEFAULT_LOCATION
456+
)
457+
458+
blob_root = (
459+
"" if output_prefix is None else f"{output_prefix}/"
460+
)
461+
ann_blob_name = (
462+
f"{blob_root}{collection}/{container_id}_ann.dcm"
413463
)
414-
wsi_dcm.save_as(wsi_path)
415464

416-
# Store to DICOM archive
465+
logging.info(f"Uploading annotation to {ann_blob_name}.")
466+
cloud_io.write_dataset_to_blob(
467+
ann_dcm,
468+
output_bucket_obj,
469+
ann_blob_name,
470+
)
471+
if with_segmentation:
472+
for s, seg_dcm in enumerate(seg_dcms):
473+
seg_blob_name = (
474+
f"{blob_root}{collection}/{container_id}_seg_{s}.dcm"
475+
)
476+
logging.info(
477+
f"Uploading segmentation to {seg_blob_name}."
478+
)
479+
cloud_io.write_dataset_to_blob(
480+
seg_dcm,
481+
output_bucket_obj,
482+
seg_blob_name,
483+
)
484+
485+
# Store objects to filesystem
486+
if output_dir is not None:
487+
ann_path = collection_dir / f"{container_id}_ann.dcm"
488+
489+
logging.info(f"Writing annotation to {str(ann_path)}.")
490+
ann_dcm.save_as(ann_path)
491+
492+
if with_segmentation:
493+
for s, seg_dcm in enumerate(seg_dcms):
494+
seg_path = collection_dir / f"{container_id}_seg_{s}.dcm"
495+
logging.info(f"Writing segmentation to {str(seg_path)}.")
496+
seg_dcm.save_as(seg_path)
497+
498+
# Store objects to DICOM archive
417499
if dicom_archive is not None:
500+
# Recreate client each time to deal with token expiration
418501
web_client = get_dicom_web_client(
419502
url=dicom_archive,
420503
token_url=archive_token_url,
421504
client_id=archive_client_id,
422505
client_secret=archive_client_secret,
423506
)
424-
web_client.store_instances([wsi_dcm])
425-
426-
ann_dcm, seg_dcms = convert_annotations(
427-
annotation_csvs=iter_csvs(ann_blob),
428-
source_images=source_images,
429-
include_segmentation=with_segmentation,
430-
segmentation_type=segmentation_type,
431-
annotation_coordinate_type=annotation_coordinate_type,
432-
dimension_organization_type=dimension_organization_type,
433-
create_pyramid=create_pyramid,
434-
graphic_type=graphic_type,
435-
workers=workers,
436-
)
437507

438-
# Store objects to bucket
439-
if store_bucket:
440-
if output_bucket is None:
441-
data_str = (datetime.date.today())
442-
output_bucket = (
443-
"pan_cancer_nuclei_seg_annotation_"
444-
f"conversion_{data_str}"
445-
)
446-
output_bucket_obj = output_client.bucket(output_bucket)
447-
448-
if not output_bucket_obj.exists():
449-
output_bucket_obj.create(
450-
location=cloud_config.GCP_DEFAULT_LOCATION
451-
)
452-
453-
blob_root = (
454-
"" if output_prefix is None else f"{output_prefix}/"
455-
)
456-
ann_blob_name = (
457-
f"{blob_root}{collection}/{container_id}_ann.dcm"
458-
)
459-
460-
logging.info(f"Uploading annotation to {ann_blob_name}.")
461-
cloud_io.write_dataset_to_blob(
462-
ann_dcm,
463-
output_bucket_obj,
464-
ann_blob_name,
465-
)
466-
if with_segmentation:
467-
for s, seg_dcm in enumerate(seg_dcms):
468-
seg_blob_name = (
469-
f"{blob_root}{collection}/{container_id}_seg_{s}.dcm"
470-
)
471-
logging.info(
472-
f"Uploading segmentation to {seg_blob_name}."
473-
)
474-
cloud_io.write_dataset_to_blob(
475-
seg_dcm,
476-
output_bucket_obj,
477-
seg_blob_name,
478-
)
479-
480-
# Store objects to filesystem
481-
if output_dir is not None:
482-
ann_path = collection_dir / f"{container_id}_ann.dcm"
483-
484-
logging.info(f"Writing annotation to {str(ann_path)}.")
485-
ann_dcm.save_as(ann_path)
486-
487-
if with_segmentation:
488-
for s, seg_dcm in enumerate(seg_dcms):
489-
seg_path = collection_dir / f"{container_id}_seg_{s}.dcm"
490-
logging.info(f"Writing segmentation to {str(seg_path)}.")
491-
seg_dcm.save_as(seg_path)
492-
493-
# Store objects to DICOM archive
494-
if dicom_archive is not None:
495-
# Recreate client each time to deal with token expiration
496-
web_client = get_dicom_web_client(
497-
url=dicom_archive,
498-
token_url=archive_token_url,
499-
client_id=archive_client_id,
500-
client_secret=archive_client_secret,
508+
logging.info(f"Writing annotation to {dicom_archive}.")
509+
web_client.store_instances([ann_dcm])
510+
511+
if with_segmentation:
512+
logging.info(f"Writing segmentation(s) to {dicom_archive}.")
513+
for seg_dcm in seg_dcms:
514+
web_client.store_instances([seg_dcm])
515+
516+
image_stop_time = time()
517+
time_for_image = image_stop_time - image_start_time
518+
logging.info(f"Processed {container_id} in {time_for_image:.2f}s")
519+
520+
except Exception as e:
521+
logging.error(f"Error {str(e)}")
522+
errors.append(
523+
{
524+
"collection": collection,
525+
"container_id": container_id,
526+
"error_message": str(e),
527+
"datetime": str(datetime.datetime.now()),
528+
}
501529
)
502-
503-
logging.info(f"Writing annotation to {dicom_archive}.")
504-
web_client.store_instances([ann_dcm])
505-
506-
if with_segmentation:
507-
logging.info(f"Writing segmentation(s) to {dicom_archive}.")
508-
for seg_dcm in seg_dcms:
509-
web_client.store_instances([seg_dcm])
510-
511-
image_stop_time = time()
512-
time_for_image = image_stop_time - image_start_time
513-
logging.info(f"Processed {container_id} in {time_for_image:.2f}s")
530+
errors_df = pd.DataFrame(errors)
531+
errors_df.to_csv("error_log.csv")
514532

515533

516534
if __name__ == "__main__":

0 commit comments

Comments
 (0)