88from git import Repo
99import os
1010from utils .download_kits23 import download_dataset
11+ from utils .info2dict import info2dict
1112from natsort import natsorted
1213import tarfile
14+ from tqdm import tqdm
1315
1416
1517def setup_word_dataset (dataset_dir ):
@@ -338,13 +340,107 @@ def setup_lits_dataset(dataset_dir):
338340 print ("Finished setting up LiTS dataset." )
339341
340342
343+ def setup_acdc_dataset (dataset_dir ):
344+ dataset_dir = Path (dataset_dir ) / "ScribbleBench"
345+ archive_dir = dataset_dir / "archive"
346+ raw_dir = dataset_dir / "raw"
347+ acdc_raw_dir = raw_dir
348+ preprocessed_dir = dataset_dir
349+ acdc_preprocessed_dir = preprocessed_dir / "ACDC"
350+ archive_dir .mkdir (parents = True , exist_ok = True )
351+ preprocessed_dir .mkdir (parents = True , exist_ok = True )
352+ acdc_preprocessed_dir .mkdir (parents = True , exist_ok = True )
353+
354+ test_set = ['patient072_ED' , 'patient041_ED' , 'patient078_ED' , 'patient024_ED' , 'patient060_ES' , 'patient078_ES' , 'patient073_ED' ,
355+ 'patient082_ED' , 'patient010_ED' , 'patient005_ED' , 'patient077_ED' , 'patient080_ED' , 'patient024_ES' , 'patient077_ES' ,
356+ 'patient030_ES' , 'patient039_ES' , 'patient041_ES' , 'patient082_ES' , 'patient060_ED' , 'patient010_ES' , 'patient030_ED' ,
357+ 'patient005_ES' , 'patient036_ES' , 'patient073_ES' , 'patient064_ES' , 'patient039_ED' , 'patient080_ES' , 'patient064_ED' ,
358+ 'patient036_ED' , 'patient072_ES' ]
359+
360+ ####################################################################################################################
361+ #### Download ACDC dataset
362+ ####################################################################################################################
363+
364+ print ("Downloading ACDC dataset..." )
365+
366+ url = "https://humanheart-project.creatis.insa-lyon.fr/database/api/v1/collection/637218c173e9f0047faa00fb/download"
367+ acdc_archive_file = archive_dir / "ACDC.zip"
368+ # Stream the download with a progress bar
369+ with requests .get (url , stream = True ) as response :
370+ response .raise_for_status ()
371+ approx_total_size = int (2452590457 )
372+ chunk_size = 8192
373+
374+ with open (acdc_archive_file , "wb" ) as f , tqdm (
375+ total = approx_total_size ,
376+ unit = "B" ,
377+ unit_scale = True ,
378+ unit_divisor = 1024 ,
379+ desc = acdc_archive_file .name ,
380+ ) as progress :
381+ for chunk in response .iter_content (chunk_size = chunk_size ):
382+ if chunk :
383+ f .write (chunk )
384+ progress .update (len (chunk ))
385+
386+ ####################################################################################################################
387+ #### Unpack ACDC archive
388+ ####################################################################################################################
389+
390+ print ("Unpacking ACDC archive..." )
391+ with zipfile .ZipFile (acdc_archive_file , 'r' ) as zip_ref :
392+ zip_ref .extractall (acdc_raw_dir )
393+
394+ ####################################################################################################################
395+ #### Preprocess ACDC dataset
396+ ####################################################################################################################
397+
398+ print ("Preprocessing ACDC dataset..." )
399+
400+ (acdc_preprocessed_dir / "imagesTr" ).mkdir (parents = True , exist_ok = True )
401+ (acdc_preprocessed_dir / "imagesTs" ).mkdir (parents = True , exist_ok = True )
402+ (acdc_preprocessed_dir / "labelsTr" ).mkdir (parents = True , exist_ok = True )
403+ (acdc_preprocessed_dir / "labelsTs" ).mkdir (parents = True , exist_ok = True )
404+
405+ acdc_train_raw_dir = acdc_raw_dir / "ACDC" / "database" / "training"
406+ names = [p .name for p in acdc_train_raw_dir .iterdir () if p .is_dir ()]
407+ names = natsorted (names )
408+
409+ for name in names :
410+ info = info2dict (acdc_train_raw_dir / name / "Info.cfg" )
411+ ed_name = f"{ name } _frame{ str (info ["ED" ]).zfill (2 )} "
412+ es_name = f"{ name } _frame{ str (info ["ES" ]).zfill (2 )} "
413+ postfix = "Tr" if f"{ name } _ED" not in test_set else "Ts"
414+ shutil .move (acdc_train_raw_dir / name / f"{ ed_name } .nii.gz" , acdc_preprocessed_dir / f"images{ postfix } " / f"{ name } _ED_0000.nii.gz" )
415+ shutil .move (acdc_train_raw_dir / name / f"{ es_name } .nii.gz" , acdc_preprocessed_dir / f"images{ postfix } " / f"{ name } _ES_0000.nii.gz" )
416+ shutil .move (acdc_train_raw_dir / name / f"{ ed_name } _gt.nii.gz" , acdc_preprocessed_dir / f"labels{ postfix } " / f"{ name } _ED.nii.gz" )
417+ shutil .move (acdc_train_raw_dir / name / f"{ es_name } _gt.nii.gz" , acdc_preprocessed_dir / f"labels{ postfix } " / f"{ name } _ES.nii.gz" )
418+
419+ dataset_json_url = "https://syncandshare.desy.de/index.php/s/KCDbLyeMwwZpFH5/download/dataset.json"
420+ response = requests .get (dataset_json_url )
421+ response .raise_for_status () # Raise an error on bad status
422+ with open (acdc_preprocessed_dir / "dataset.json" , "wb" ) as f :
423+ f .write (response .content )
424+
425+ ####################################################################################################################
426+ #### Delete raw dataset files
427+ ####################################################################################################################
428+
429+ print ("Deleting archive and raw dataset files..." )
430+ shutil .rmtree (archive_dir , ignore_errors = True )
431+ shutil .rmtree (raw_dir , ignore_errors = True )
432+
433+ print ("Finished setting up ACDC dataset." )
434+
435+
341436if __name__ == '__main__' :
342437 parser = argparse .ArgumentParser ()
343438 parser .add_argument ('-d' , "--dataset_dir" , required = True , type = str , help = "Path to the dir used for setting up ScribbleBench." )
344439 parser .add_argument ('--word' , required = False , default = False , action = "store_true" , help = "Download and preprocess the WORD dataset for ScribbleBench." )
345440 parser .add_argument ('--mscmr' , required = False , default = False , action = "store_true" , help = "Download and preprocess the MSCMR dataset for ScribbleBench." )
346441 parser .add_argument ('--kits' , required = False , default = False , action = "store_true" , help = "Download and preprocess the KiTS2023 dataset for ScribbleBench." )
347442 parser .add_argument ('--lits' , required = False , default = False , action = "store_true" , help = "Download and preprocess the LiTS dataset for ScribbleBench." )
443+ parser .add_argument ('--acdc' , required = False , default = False , action = "store_true" , help = "Download and preprocess the ACDC dataset for ScribbleBench." )
348444 args = parser .parse_args ()
349445
350446 if args .word :
@@ -355,3 +451,6 @@ def setup_lits_dataset(dataset_dir):
355451 setup_kits_dataset (args .dataset_dir )
356452 if args .lits :
357453 setup_lits_dataset (args .dataset_dir )
454+ if args .lits :
455+ setup_acdc_dataset (args .dataset_dir )
456+
0 commit comments