Skip to content

Commit 0d2a2a4

Browse files
committed
feat: Added benchmark setup script (WIP)
1 parent 3fda282 commit 0d2a2a4

File tree

2 files changed

+209
-0
lines changed

2 files changed

+209
-0
lines changed

scribblebench/setup_benchmark.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
import gdown
2+
import subprocess
3+
from pathlib import Path
4+
import requests
5+
import zipfile
6+
import shutil
7+
import argparse
8+
from git import Repo
9+
import os
10+
11+
12+
def setup_word_dataset(dataset_dir):
13+
dataset_dir = Path(dataset_dir) / "ScribbleBench"
14+
archive_dir = dataset_dir / "archives"
15+
raw_dir = dataset_dir / "raw"
16+
preprocessed_dir = dataset_dir
17+
archive_dir.mkdir(parents=True, exist_ok=True)
18+
raw_dir.mkdir(parents=True, exist_ok=True)
19+
preprocessed_dir.mkdir(parents=True, exist_ok=True)
20+
21+
####################################################################################################################
22+
#### Download WORD dataset (no GT labels)
23+
####################################################################################################################
24+
25+
print("Downloading WORD dataset (no GT labels)...")
26+
url = 'https://drive.google.com/file/d/19OWCXZGrimafREhXm8O8w2HBHZTfxEgU/view'
27+
gdown.download(url, str(archive_dir / "WORD-V0.1.0.zip"), fuzzy=True)
28+
29+
####################################################################################################################
30+
#### Unpack WORD dataset archive
31+
####################################################################################################################
32+
33+
print("Unpacking WORD dataset archive...")
34+
subprocess.run([
35+
"7z", "x", archive_dir / "WORD-V0.1.0.zip",
36+
f"-pword@uestc",
37+
f"-o{raw_dir / "WORD"}"
38+
], check=True)
39+
40+
####################################################################################################################
41+
#### Download WORD GT labels
42+
####################################################################################################################
43+
44+
print("Downloading WORD GT labels...")
45+
url = "https://github.com/HiLab-git/WORD/raw/main/WORD_V0.1.0_labelsTs.zip"
46+
response = requests.get(url)
47+
response.raise_for_status() # Raise an error on bad status
48+
with open(archive_dir / "WORD_V0.1.0_labelsTs.zip", "wb") as f:
49+
f.write(response.content)
50+
51+
####################################################################################################################
52+
#### Unpack WORD labels archive
53+
####################################################################################################################
54+
55+
print("Unpacking WORD labels archive...")
56+
with zipfile.ZipFile(archive_dir / "WORD_V0.1.0_labelsTs.zip", 'r') as zip_ref:
57+
zip_ref.extractall(raw_dir / "WORD" / "WORD-V0.1.0")
58+
59+
####################################################################################################################
60+
#### Preprocess WORD dataset
61+
####################################################################################################################
62+
63+
print("Preprocessing WORD dataset...")
64+
word_raw_dir = raw_dir / "WORD" / "WORD-V0.1.0"
65+
word_preprocessed_dir = preprocessed_dir / "WORD"
66+
word_raw_dir = Path(word_raw_dir)
67+
word_preprocessed_dir = Path(word_preprocessed_dir)
68+
69+
(word_preprocessed_dir / "imagesTr").mkdir(parents=True, exist_ok=True)
70+
(word_preprocessed_dir / "imagesTs").mkdir(parents=True, exist_ok=True)
71+
(word_preprocessed_dir / "labelsTr").mkdir(parents=True, exist_ok=True)
72+
(word_preprocessed_dir / "labelsTs").mkdir(parents=True, exist_ok=True)
73+
74+
names = [path.name[:-7] for path in (word_raw_dir / "imagesTr").rglob("*.nii.gz")]
75+
for name in names:
76+
shutil.move(word_raw_dir / "imagesTr" / f"{name}.nii.gz", word_preprocessed_dir / "imagesTr" / f"{name}_0000.nii.gz")
77+
78+
names = [path.name[:-7] for path in (word_raw_dir / "imagesVal").rglob("*.nii.gz")]
79+
for name in names:
80+
shutil.move(word_raw_dir / "imagesVal" / f"{name}.nii.gz", word_preprocessed_dir / "imagesTr" / f"{name}_0000.nii.gz")
81+
82+
names = [path.name[:-7] for path in (word_raw_dir / "imagesTs").rglob("*.nii.gz")]
83+
for name in names:
84+
shutil.move(word_raw_dir / "imagesTs" / f"{name}.nii.gz", word_preprocessed_dir / "imagesTs" / f"{name}_0000.nii.gz")
85+
86+
87+
names = [path.name[:-7] for path in (word_raw_dir / "labelsTr").rglob("*.nii.gz")]
88+
for name in names:
89+
shutil.move(word_raw_dir / "labelsTr" / f"{name}.nii.gz", word_preprocessed_dir / "labelsTr" / f"{name}.nii.gz")
90+
91+
names = [path.name[:-7] for path in (word_raw_dir / "labelsVal").rglob("*.nii.gz")]
92+
for name in names:
93+
shutil.move(word_raw_dir / "labelsVal" / f"{name}.nii.gz", word_preprocessed_dir / "labelsTr" / f"{name}.nii.gz")
94+
95+
names = [path.name[:-7] for path in (word_raw_dir / "labelsTs").rglob("*.nii.gz")]
96+
for name in names:
97+
shutil.move(word_raw_dir / "labelsTs" / f"{name}.nii.gz", word_preprocessed_dir / "labelsTs" / f"{name}.nii.gz")
98+
99+
100+
shutil.move(word_raw_dir / "dataset.json", word_preprocessed_dir / "dataset.json")
101+
102+
####################################################################################################################
103+
#### Delete archive and raw dataset files
104+
####################################################################################################################
105+
106+
print("Deleting archive and raw dataset files...")
107+
shutil.rmtree(archive_dir, ignore_errors=True)
108+
shutil.rmtree(raw_dir, ignore_errors=True)
109+
110+
print("Finished setting up WORD dataset.")
111+
112+
113+
def setup_mscmr_dataset(dataset_dir):
114+
dataset_dir = Path(dataset_dir) / "ScribbleBench"
115+
archive_dir = dataset_dir / "archive"
116+
raw_dir = dataset_dir / "raw"
117+
preprocessed_dir = dataset_dir
118+
mscmr_preprocessed_dir = preprocessed_dir / "MSCMR"
119+
archive_dir.mkdir(parents=True, exist_ok=True)
120+
raw_dir.mkdir(parents=True, exist_ok=True)
121+
preprocessed_dir.mkdir(parents=True, exist_ok=True)
122+
mscmr_preprocessed_dir.mkdir(parents=True, exist_ok=True)
123+
124+
####################################################################################################################
125+
#### Download MSCMR dataset
126+
####################################################################################################################
127+
128+
print("Downloading MSCMR dataset...")
129+
repo_url = "https://github.com/BWGZK/CycleMix.git"
130+
repo_dir = raw_dir / "CycleMix"
131+
132+
Repo.clone_from(repo_url, repo_dir)
133+
134+
train_labels_url = "https://syncandshare.desy.de/index.php/s/j2t8g8P8LHb9Xfk/download/labelsTr.zip"
135+
response = requests.get(train_labels_url)
136+
response.raise_for_status() # Raise an error on bad status
137+
with open(archive_dir / "labelsTr.zip", "wb") as f:
138+
f.write(response.content)
139+
140+
####################################################################################################################
141+
#### Unpack MSCMR labels archive
142+
####################################################################################################################
143+
144+
print("Unpacking MSCMR labels archive...")
145+
with zipfile.ZipFile(archive_dir / "labelsTr.zip", 'r') as zip_ref:
146+
zip_ref.extractall(mscmr_preprocessed_dir)
147+
148+
####################################################################################################################
149+
#### Preprocess WORD dataset
150+
####################################################################################################################
151+
152+
print("Preprocessing MSCMR dataset...")
153+
mscmr_raw_dir = repo_dir / "MSCMR_dataset"
154+
155+
(mscmr_preprocessed_dir / "imagesTr").mkdir(parents=True, exist_ok=True)
156+
(mscmr_preprocessed_dir / "imagesTs").mkdir(parents=True, exist_ok=True)
157+
(mscmr_preprocessed_dir / "labelsTr").mkdir(parents=True, exist_ok=True)
158+
(mscmr_preprocessed_dir / "labelsTs").mkdir(parents=True, exist_ok=True)
159+
160+
names = [path.name[:-7] for path in (mscmr_raw_dir / "train" / "images").rglob("*.nii.gz")]
161+
for name in names:
162+
shutil.move(mscmr_raw_dir / "train" / "images" / f"{name}.nii.gz", mscmr_preprocessed_dir / "imagesTr" / f"{name}_0000.nii.gz")
163+
164+
names = [path.name[:-7] for path in (mscmr_raw_dir / "val" / "images").rglob("*.nii.gz")]
165+
for name in names:
166+
shutil.move(mscmr_raw_dir / "val" / "images" / f"{name}.nii.gz", mscmr_preprocessed_dir / "imagesTr" / f"{name}_0000.nii.gz")
167+
168+
names = [path.name[:-7] for path in (mscmr_raw_dir / "TestSet" / "images").rglob("*.nii.gz")]
169+
for name in names:
170+
shutil.move(mscmr_raw_dir / "TestSet" / "images" / f"{name}.nii.gz", mscmr_preprocessed_dir / "imagesTs" / f"{name}_0000.nii.gz")
171+
172+
names = [path.name[:-7] for path in (mscmr_raw_dir / "TestSet" / "labels").rglob("*.nii.gz")]
173+
for name in names:
174+
shutil.move(mscmr_raw_dir / "TestSet" / "labels" / f"{name}.nii.gz", mscmr_preprocessed_dir / "labelsTs" / f"{name}_0000.nii.gz")
175+
176+
# These two images have no dense GT so it is not possible to generate scribbles for them
177+
os.remove(mscmr_preprocessed_dir / "imagesTr" / "subject2_DE_0000.nii.gz")
178+
os.remove(mscmr_preprocessed_dir / "imagesTr" / "subject4_DE_0000.nii.gz")
179+
180+
dataset_json_url = "https://syncandshare.desy.de/index.php/s/9gdZ33WL2nPXpGC/download/dataset.json"
181+
response = requests.get(dataset_json_url)
182+
response.raise_for_status() # Raise an error on bad status
183+
with open(mscmr_preprocessed_dir / "dataset.json", "wb") as f:
184+
f.write(response.content)
185+
186+
####################################################################################################################
187+
#### Delete archive and raw dataset files
188+
####################################################################################################################
189+
190+
print("Deleting archive and raw dataset files...")
191+
shutil.rmtree(archive_dir, ignore_errors=True)
192+
shutil.rmtree(raw_dir, ignore_errors=True)
193+
194+
print("Finished setting up MSCMR dataset.")
195+
196+
197+
if __name__ == '__main__':
198+
parser = argparse.ArgumentParser()
199+
parser.add_argument('-d', "--dataset_dir", required=True, type=str, help="Path to the dir used for setting up ScribbleBench.")
200+
parser.add_argument('--word', required=False, default=False, action="store_true", help="Download and preprocess the WORD dataset for ScribbleBench.")
201+
parser.add_argument('--mscmr', required=False, default=False, action="store_true", help="Download and preprocess the MSCMR dataset for ScribbleBench.")
202+
args = parser.parse_args()
203+
204+
if args.word:
205+
setup_word_dataset(args.dataset_dir)
206+
if args.word:
207+
setup_mscmr_dataset(args.dataset_dir)

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ install_requires =
3434
shapely
3535
pandas
3636
medvol
37+
gdown
38+
GitPython
3739
python_requires = >=3.8
3840
include_package_data = True
3941
; package_dir =

0 commit comments

Comments
 (0)