Skip to content

Commit 43393e8

Browse files
committed
feat: Added benchmark setup script (WIP)
1 parent 60de9ac commit 43393e8

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

scribblebench/setup_benchmark.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
from git import Repo
99
import os
1010
from utils.download_kits23 import download_dataset
11+
from utils.info2dict import info2dict
1112
from natsort import natsorted
1213
import tarfile
14+
from tqdm import tqdm
1315

1416

1517
def setup_word_dataset(dataset_dir):
@@ -338,13 +340,107 @@ def setup_lits_dataset(dataset_dir):
338340
print("Finished setting up LiTS dataset.")
339341

340342

343+
def setup_acdc_dataset(dataset_dir):
344+
dataset_dir = Path(dataset_dir) / "ScribbleBench"
345+
archive_dir = dataset_dir / "archive"
346+
raw_dir = dataset_dir / "raw"
347+
acdc_raw_dir = raw_dir
348+
preprocessed_dir = dataset_dir
349+
acdc_preprocessed_dir = preprocessed_dir / "ACDC"
350+
archive_dir.mkdir(parents=True, exist_ok=True)
351+
preprocessed_dir.mkdir(parents=True, exist_ok=True)
352+
acdc_preprocessed_dir.mkdir(parents=True, exist_ok=True)
353+
354+
test_set = ['patient072_ED', 'patient041_ED', 'patient078_ED', 'patient024_ED', 'patient060_ES', 'patient078_ES', 'patient073_ED',
355+
'patient082_ED', 'patient010_ED', 'patient005_ED', 'patient077_ED', 'patient080_ED', 'patient024_ES', 'patient077_ES',
356+
'patient030_ES', 'patient039_ES', 'patient041_ES', 'patient082_ES', 'patient060_ED', 'patient010_ES', 'patient030_ED',
357+
'patient005_ES', 'patient036_ES', 'patient073_ES', 'patient064_ES', 'patient039_ED', 'patient080_ES', 'patient064_ED',
358+
'patient036_ED', 'patient072_ES']
359+
360+
####################################################################################################################
361+
#### Download ACDC dataset
362+
####################################################################################################################
363+
364+
print("Downloading ACDC dataset...")
365+
366+
url = "https://humanheart-project.creatis.insa-lyon.fr/database/api/v1/collection/637218c173e9f0047faa00fb/download"
367+
acdc_archive_file = archive_dir / "ACDC.zip"
368+
# Stream the download with a progress bar
369+
with requests.get(url, stream=True) as response:
370+
response.raise_for_status()
371+
approx_total_size = int(2452590457)
372+
chunk_size = 8192
373+
374+
with open(acdc_archive_file, "wb") as f, tqdm(
375+
total=approx_total_size,
376+
unit="B",
377+
unit_scale=True,
378+
unit_divisor=1024,
379+
desc=acdc_archive_file.name,
380+
) as progress:
381+
for chunk in response.iter_content(chunk_size=chunk_size):
382+
if chunk:
383+
f.write(chunk)
384+
progress.update(len(chunk))
385+
386+
####################################################################################################################
387+
#### Unpack ACDC archive
388+
####################################################################################################################
389+
390+
print("Unpacking ACDC archive...")
391+
with zipfile.ZipFile(acdc_archive_file, 'r') as zip_ref:
392+
zip_ref.extractall(acdc_raw_dir)
393+
394+
####################################################################################################################
395+
#### Preprocess ACDC dataset
396+
####################################################################################################################
397+
398+
print("Preprocessing ACDC dataset...")
399+
400+
(acdc_preprocessed_dir / "imagesTr").mkdir(parents=True, exist_ok=True)
401+
(acdc_preprocessed_dir / "imagesTs").mkdir(parents=True, exist_ok=True)
402+
(acdc_preprocessed_dir / "labelsTr").mkdir(parents=True, exist_ok=True)
403+
(acdc_preprocessed_dir / "labelsTs").mkdir(parents=True, exist_ok=True)
404+
405+
acdc_train_raw_dir = acdc_raw_dir / "ACDC" / "database" / "training"
406+
names = [p.name for p in acdc_train_raw_dir.iterdir() if p.is_dir()]
407+
names = natsorted(names)
408+
409+
for name in names:
410+
info = info2dict(acdc_train_raw_dir / name / "Info.cfg")
411+
ed_name = f"{name}_frame{str(info["ED"]).zfill(2)}"
412+
es_name = f"{name}_frame{str(info["ES"]).zfill(2)}"
413+
postfix = "Tr" if f"{name}_ED" not in test_set else "Ts"
414+
shutil.move(acdc_train_raw_dir / name / f"{ed_name}.nii.gz", acdc_preprocessed_dir / f"images{postfix}" / f"{name}_ED_0000.nii.gz")
415+
shutil.move(acdc_train_raw_dir / name / f"{es_name}.nii.gz", acdc_preprocessed_dir / f"images{postfix}" / f"{name}_ES_0000.nii.gz")
416+
shutil.move(acdc_train_raw_dir / name / f"{ed_name}_gt.nii.gz", acdc_preprocessed_dir / f"labels{postfix}" / f"{name}_ED.nii.gz")
417+
shutil.move(acdc_train_raw_dir / name / f"{es_name}_gt.nii.gz", acdc_preprocessed_dir / f"labels{postfix}" / f"{name}_ES.nii.gz")
418+
419+
dataset_json_url = "https://syncandshare.desy.de/index.php/s/KCDbLyeMwwZpFH5/download/dataset.json"
420+
response = requests.get(dataset_json_url)
421+
response.raise_for_status() # Raise an error on bad status
422+
with open(acdc_preprocessed_dir / "dataset.json", "wb") as f:
423+
f.write(response.content)
424+
425+
####################################################################################################################
426+
#### Delete raw dataset files
427+
####################################################################################################################
428+
429+
print("Deleting archive and raw dataset files...")
430+
shutil.rmtree(archive_dir, ignore_errors=True)
431+
shutil.rmtree(raw_dir, ignore_errors=True)
432+
433+
print("Finished setting up ACDC dataset.")
434+
435+
341436
if __name__ == '__main__':
342437
parser = argparse.ArgumentParser()
343438
parser.add_argument('-d', "--dataset_dir", required=True, type=str, help="Path to the dir used for setting up ScribbleBench.")
344439
parser.add_argument('--word', required=False, default=False, action="store_true", help="Download and preprocess the WORD dataset for ScribbleBench.")
345440
parser.add_argument('--mscmr', required=False, default=False, action="store_true", help="Download and preprocess the MSCMR dataset for ScribbleBench.")
346441
parser.add_argument('--kits', required=False, default=False, action="store_true", help="Download and preprocess the KiTS2023 dataset for ScribbleBench.")
347442
parser.add_argument('--lits', required=False, default=False, action="store_true", help="Download and preprocess the LiTS dataset for ScribbleBench.")
443+
parser.add_argument('--acdc', required=False, default=False, action="store_true", help="Download and preprocess the ACDC dataset for ScribbleBench.")
348444
args = parser.parse_args()
349445

350446
if args.word:
@@ -355,3 +451,6 @@ def setup_lits_dataset(dataset_dir):
355451
setup_kits_dataset(args.dataset_dir)
356452
if args.lits:
357453
setup_lits_dataset(args.dataset_dir)
454+
if args.lits:
455+
setup_acdc_dataset(args.dataset_dir)
456+

scribblebench/utils/info2dict.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
def info2dict(filepath):
3+
with open((filepath.parent / "Info.cfg"), "r") as f:
4+
info = f.read()
5+
6+
data = {}
7+
for line in info.strip().splitlines():
8+
key, value = line.split(":", 1)
9+
key = key.strip()
10+
value = value.strip()
11+
12+
# Convert value to int or float if possible
13+
if value.replace('.', '', 1).isdigit():
14+
value = float(value) if '.' in value else int(value)
15+
16+
data[key] = value
17+
return data

0 commit comments

Comments
 (0)